Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into es-111798-fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ywangd committed Nov 15, 2024
2 parents 159edfd + 87c2495 commit 97edf5e
Show file tree
Hide file tree
Showing 319 changed files with 10,451 additions and 2,309 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
package org.elasticsearch.benchmark.index.mapper;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Accountable;
import org.elasticsearch.TransportVersion;
import org.elasticsearch.cluster.ClusterModule;
import org.elasticsearch.cluster.metadata.IndexMetadata;
Expand All @@ -28,7 +27,6 @@
import org.elasticsearch.index.mapper.MapperRegistry;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ProvidedIdFieldMapper;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.similarity.SimilarityService;
import org.elasticsearch.indices.IndicesModule;
import org.elasticsearch.script.Script;
Expand Down Expand Up @@ -56,13 +54,7 @@ public static MapperService create(String mappings) {
MapperRegistry mapperRegistry = new IndicesModule(Collections.emptyList()).getMapperRegistry();

SimilarityService similarityService = new SimilarityService(indexSettings, null, Map.of());
BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(indexSettings, new BitsetFilterCache.Listener() {
@Override
public void onCache(ShardId shardId, Accountable accountable) {}

@Override
public void onRemoval(ShardId shardId, Accountable accountable) {}
});
BitsetFilterCache bitsetFilterCache = new BitsetFilterCache(indexSettings, BitsetFilterCache.Listener.NOOP);
MapperService mapperService = new MapperService(
() -> TransportVersion.current(),
indexSettings,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ java.time.zone.ZoneRules#getStandardOffset(java.time.Instant)
java.time.zone.ZoneRules#getDaylightSavings(java.time.Instant)
java.time.zone.ZoneRules#isDaylightSavings(java.time.Instant)

@defaultMessage Use logger methods with non-Object parameter
@defaultMessage The first parameter to a log4j log statement should be a String, a log4j Supplier (not java.util.function.Supplier), or another object that log4j supports.
org.apache.logging.log4j.Logger#trace(java.lang.Object)
org.apache.logging.log4j.Logger#trace(java.lang.Object, java.lang.Throwable)
org.apache.logging.log4j.Logger#debug(java.lang.Object)
Expand Down
2 changes: 1 addition & 1 deletion build-tools-internal/version.properties
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ log4j = 2.19.0
slf4j = 2.0.6
ecsLogging = 1.2.0
jna = 5.12.1
netty = 4.1.109.Final
netty = 4.1.115.Final
commons_lang3 = 3.9
google_oauth_client = 1.34.1

Expand Down
7 changes: 5 additions & 2 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,11 @@ gradle.projectsEvaluated {
}
}

tasks.named("validateChangelogs") {
onlyIf { project.gradle.startParameter.taskNames.any { it.startsWith("checkPart") || it == 'functionalTests' } == false }
tasks.named("validateChangelogs").configure {
def triggeredTaskNames = gradle.startParameter.taskNames
onlyIf {
triggeredTaskNames.any { it.startsWith("checkPart") || it == 'functionalTests' } == false
}
}

tasks.named("precommit") {
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/113194.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 113194
summary: Add Search Phase APM metrics
area: Search
type: enhancement
issues: []
14 changes: 14 additions & 0 deletions docs/changelog/114202.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
pr: 114202
summary: Remove deprecated `xpack.searchable.snapshot.allocate_on_rolling_restart` setting
area: Snapshot/Restore
type: breaking
issues: []
breaking:
title: Remove deprecated `xpack.searchable.snapshot.allocate_on_rolling_restart` setting
area: 'Cluster and node setting'
details: >-
The `xpack.searchable.snapshot.allocate_on_rolling_restart` setting was created as an escape-hatch just in case
relying on the `cluster.routing.allocation.enable=primaries` setting for allocating searchable snapshots during
rolling restarts had some unintended side-effects. It has been deprecated since 8.2.0.
impact: Remove `xpack.searchable.snapshot.allocate_on_rolling_restart` from your settings if present.
notable: false
6 changes: 6 additions & 0 deletions docs/changelog/115142.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 115142
summary: Attempt to clean up index before remote transfer
area: Recovery
type: enhancement
issues:
- 104473
5 changes: 5 additions & 0 deletions docs/changelog/115678.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 115678
summary: "ESQL: extract common filter from aggs"
area: ES|QL
type: enhancement
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/115687.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 115687
summary: Add default ILM policies and switch to ILM for apm-data plugin
area: Data streams
type: feature
issues: []
6 changes: 6 additions & 0 deletions docs/changelog/115814.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 115814
summary: "[ES|QL] Implicit casting string literal to intervals"
area: ES|QL
type: enhancement
issues:
- 115352
5 changes: 5 additions & 0 deletions docs/changelog/115858.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 115858
summary: "ESQL: optimise aggregations filtered by false/null into evals"
area: ES|QL
type: enhancement
issues: []
14 changes: 14 additions & 0 deletions docs/changelog/116077.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
pr: 116077
summary: Remove `ecs` option on `user_agent` processor
area: Ingest Node
type: breaking
issues: []
breaking:
title: Remove `ecs` option on `user_agent` processor
area: Ingest
details: >-
The `user_agent` ingest processor no longer accepts the `ecs` option. (It was previously deprecated and ignored.)
impact: >-
Users should stop using the `ecs` option when creating instances of the `user_agent` ingest processor.
The option will be removed from existing processors stored in the cluster state on upgrade.
notable: false
5 changes: 5 additions & 0 deletions docs/changelog/116339.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116339
summary: "Index stats enhancement: creation date and `tier_preference`"
area: Stats
type: feature
issues: []
5 changes: 5 additions & 0 deletions docs/changelog/116348.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116348
summary: "ESQL: Honor skip_unavailable setting for nonmatching indices errors at planning time"
area: ES|QL
type: enhancement
issues: [ 114531 ]
5 changes: 5 additions & 0 deletions docs/changelog/116357.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 116357
summary: Add tracking for query rule types
area: Relevance
type: enhancement
issues: []
6 changes: 6 additions & 0 deletions docs/changelog/116408.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 116408
summary: Propagating nested `inner_hits` to the parent compound retriever
area: Ranking
type: bug
issues:
- 116397
6 changes: 6 additions & 0 deletions docs/changelog/116656.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 116656
summary: _validate does not honour ignore_unavailable
area: Search
type: bug
issues:
- 116594

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/esql/functions/kibana/docs/bit_length.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion docs/reference/esql/functions/kibana/docs/byte_length.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion docs/reference/esql/functions/kibana/docs/length.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 26 additions & 14 deletions docs/reference/esql/implicit-casting.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
<titleabbrev>Implicit casting</titleabbrev>
++++

Often users will input `datetime`, `ip`, `version`, or geospatial objects as simple strings in their queries for use in predicates, functions, or expressions. {esql} provides <<esql-type-conversion-functions, type conversion functions>> to explicitly convert these strings into the desired data types.
Often users will input `date`, `ip`, `version`, `date_period` or `time_duration` as simple strings in their queries for use in predicates, functions, or expressions. {esql} provides <<esql-type-conversion-functions, type conversion functions>> to explicitly convert these strings into the desired data types.

Without implicit casting users must explicitly code these `to_X` functions in their queries, when string literals don't match the target data types they are assigned or compared to. Here is an example of using `to_datetime` to explicitly perform a data type conversion.

Expand All @@ -18,7 +18,7 @@ FROM employees
| LIMIT 1
----

Implicit casting improves usability, by automatically converting string literals to the target data type. This is most useful when the target data type is `datetime`, `ip`, `version` or a geo spatial. It is natural to specify these as a string in queries.
Implicit casting improves usability, by automatically converting string literals to the target data type. This is most useful when the target data type is `date`, `ip`, `version`, `date_period` or `time_duration`. It is natural to specify these as a string in queries.

The first query can be coded without calling the `to_datetime` function, as follows:

Expand All @@ -38,16 +38,28 @@ The following table details which {esql} operations support implicit casting for

[%header.monospaced.styled,format=dsv,separator=|]
|===
||ScalarFunction|BinaryComparison|ArithmeticOperation|InListPredicate|AggregateFunction
|DATETIME|Y|Y|Y|Y|N
|DOUBLE|Y|N|N|N|N
|LONG|Y|N|N|N|N
|INTEGER|Y|N|N|N|N
|IP|Y|Y|Y|Y|N
|VERSION|Y|Y|Y|Y|N
|GEO_POINT|Y|N|N|N|N
|GEO_SHAPE|Y|N|N|N|N
|CARTESIAN_POINT|Y|N|N|N|N
|CARTESIAN_SHAPE|Y|N|N|N|N
|BOOLEAN|Y|Y|Y|Y|N
||ScalarFunction*|Operator*|<<esql-group-functions, GroupingFunction>>|<<esql-agg-functions, AggregateFunction>>
|DATE|Y|Y|Y|N
|IP|Y|Y|Y|N
|VERSION|Y|Y|Y|N
|BOOLEAN|Y|Y|Y|N
|DATE_PERIOD/TIME_DURATION|Y|N|Y|N
|===

ScalarFunction* includes:

<<esql-conditional-functions-and-expressions, Conditional Functions and Expressions>>

<<esql-date-time-functions, Date and Time Functions>>

<<esql-ip-functions, IP Functions>>


Operator* includes:

<<esql-binary-operators, Binary Operators>>

<<esql-unary-operators, Unary Operator>>

<<esql-in-operator, IN>>

8 changes: 7 additions & 1 deletion docs/reference/inference/service-elser.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ You can also deploy ELSER by using the <<infer-service-elasticsearch>>.
NOTE: The API request will automatically download and deploy the ELSER model if
it isn't already downloaded.

[WARNING]
.Deprecated in 8.16
====
The elser service is deprecated and will be removed in a future release.
Use the <<infer-service-elasticsearch>> instead, with model_id included in the service_settings.
====

[discrete]
[[infer-service-elser-api-request]]
Expand Down Expand Up @@ -173,4 +179,4 @@ PUT _inference/sparse_embedding/my-elser-model
}
}
------------------------------------------------------------
// TEST[skip:TBD]
// TEST[skip:TBD]
15 changes: 6 additions & 9 deletions docs/reference/mapping/types/dense-vector.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -117,12 +117,10 @@ that sacrifices result accuracy for improved speed.
The `dense_vector` type supports quantization to reduce the memory footprint required when <<approximate-knn, searching>> `float` vectors.
The three following quantization strategies are supported:

+
--
`int8` - Quantizes each dimension of the vector to 1-byte integers. This reduces the memory footprint by 75% (or 4x) at the cost of some accuracy.
`int4` - Quantizes each dimension of the vector to half-byte integers. This reduces the memory footprint by 87% (or 8x) at the cost of accuracy.
`bbq` - experimental:[] Better binary quantization which reduces each dimension to a single bit precision. This reduces the memory footprint by 96% (or 32x) at a larger cost of accuracy. Generally, oversampling during query time and reranking can help mitigate the accuracy loss.
--
* `int8` - Quantizes each dimension of the vector to 1-byte integers. This reduces the memory footprint by 75% (or 4x) at the cost of some accuracy.
* `int4` - Quantizes each dimension of the vector to half-byte integers. This reduces the memory footprint by 87% (or 8x) at the cost of accuracy.
* `bbq` - experimental:[] Better binary quantization which reduces each dimension to a single bit precision. This reduces the memory footprint by 96% (or 32x) at a larger cost of accuracy. Generally, oversampling during query time and reranking can help mitigate the accuracy loss.


When using a quantized format, you may want to oversample and rescore the results to improve accuracy. See <<dense-vector-knn-search-reranking, oversampling and rescoring>> for more information.

Expand Down Expand Up @@ -245,12 +243,11 @@ their vector field's similarity to the query vector. The `_score` of each
document will be derived from the similarity, in a way that ensures scores are
positive and that a larger score corresponds to a higher ranking.
Defaults to `l2_norm` when `element_type: bit` otherwise defaults to `cosine`.

NOTE: `bit` vectors only support `l2_norm` as their similarity metric.

+
^*^ This parameter can only be specified when `index` is `true`.
+
NOTE: `bit` vectors only support `l2_norm` as their similarity metric.

.Valid values for `similarity`
[%collapsible%open]
====
Expand Down
5 changes: 2 additions & 3 deletions docs/reference/mapping/types/semantic-text.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -87,16 +87,15 @@ Trying to <<delete-inference-api,delete an {infer} endpoint>> that is used on a

[discrete]
[[auto-text-chunking]]
==== Automatic text chunking
==== Text chunking

{infer-cap} endpoints have a limit on the amount of text they can process.
To allow for large amounts of text to be used in semantic search, `semantic_text` automatically generates smaller passages if needed, called _chunks_.

Each chunk will include the text subpassage and the corresponding embedding generated from it.
When querying, the individual passages will be automatically searched for each document, and the most relevant passage will be used to compute a score.

Documents are split into 250-word sections with a 100-word overlap so that each section shares 100 words with the previous section.
This overlap ensures continuity and prevents vital contextual information in the input text from being lost by a hard break.
For more details on chunking and how to configure chunking settings, see <<infer-chunking-config, Configuring chunking>> in the Inference API documentation.


[discrete]
Expand Down
12 changes: 11 additions & 1 deletion docs/reference/query-rules/apis/list-query-rulesets.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ PUT _query_rules/ruleset-3
},
{
"rule_id": "rule-3",
"type": "pinned",
"type": "exclude",
"criteria": [
{
"type": "fuzzy",
Expand Down Expand Up @@ -178,6 +178,9 @@ A sample response:
"rule_total_count": 1,
"rule_criteria_types_counts": {
"exact": 1
},
"rule_type_counts": {
"pinned": 1
}
},
{
Expand All @@ -186,6 +189,9 @@ A sample response:
"rule_criteria_types_counts": {
"exact": 1,
"fuzzy": 1
},
"rule_type_counts": {
"pinned": 2
}
},
{
Expand All @@ -194,6 +200,10 @@ A sample response:
"rule_criteria_types_counts": {
"exact": 1,
"fuzzy": 2
},
"rule_type_counts": {
"pinned": 2,
"exclude": 1
}
}
]
Expand Down
Loading

0 comments on commit 97edf5e

Please sign in to comment.