diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java index b2094084fec30..d0308d2166bfa 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/script/ScriptScoreBenchmark.java @@ -79,7 +79,7 @@ public class ScriptScoreBenchmark { private final ScriptModule scriptModule = new ScriptModule(Settings.EMPTY, pluginsService.filterPlugins(ScriptPlugin.class)); private final Map fieldTypes = Map.ofEntries( - Map.entry("n", new NumberFieldType("n", NumberType.LONG, false, false, true, true, null, Map.of())) + Map.entry("n", new NumberFieldType("n", NumberType.LONG, false, false, true, true, null, Map.of(), null)) ); private final IndexFieldDataCache fieldDataCache = new IndexFieldDataCache.None(); private final CircuitBreakerService breakerService = new NoneCircuitBreakerService(); diff --git a/docs/reference/mapping/types/numeric.asciidoc b/docs/reference/mapping/types/numeric.asciidoc index c03554ec142f0..b0377eeec9a1e 100644 --- a/docs/reference/mapping/types/numeric.asciidoc +++ b/docs/reference/mapping/types/numeric.asciidoc @@ -117,6 +117,7 @@ The following parameters are accepted by numeric types: Try to convert strings to numbers and truncate fractions for integers. Accepts `true` (default) and `false`. Not applicable for `unsigned_long`. + Note that this cannot be set if the `script` parameter is used. <>:: @@ -127,7 +128,8 @@ The following parameters are accepted by numeric types: <>:: If `true`, malformed numbers are ignored. If `false` (default), malformed - numbers throw an exception and reject the whole document. + numbers throw an exception and reject the whole document. Note that this + cannot be set if the `script` parameter is used. <>:: @@ -137,7 +139,26 @@ The following parameters are accepted by numeric types: Accepts a numeric value of the same `type` as the field which is substituted for any explicit `null` values. Defaults to `null`, which - means the field is treated as missing. + means the field is treated as missing. Note that this cannot be set + if the `script` parameter is used. + +`on_script_error`:: + + Defines what to do if the script defined by the `script` parameter + throws an error at indexing time. Accepts `reject` (default), which + will cause the entire document to be rejected, and `ignore`, which + will register the field in the document's + <> metadata field and continue + indexing. This parameter can only be set if the `script` field is + also set. + +`script`:: + + If this parameter is set, then the field will index values generated + by this script, rather than reading the values directly from the + source. Scripts are in the same format as their + <>. Scripts can only be + configured on `long` and `double` field types. <>:: diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java index 13d36eb2d3217..3d563d97aa18a 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/TokenCountFieldMapper.java @@ -77,7 +77,7 @@ static class TokenCountFieldType extends NumberFieldMapper.NumberFieldType { TokenCountFieldType(String name, boolean isSearchable, boolean isStored, boolean hasDocValues, Number nullValue, Map meta) { - super(name, NumberFieldMapper.NumberType.INTEGER, isSearchable, isStored, hasDocValues, false, nullValue, meta); + super(name, NumberFieldMapper.NumberType.INTEGER, isSearchable, isStored, hasDocValues, false, nullValue, meta, null); } @Override diff --git a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/23_long_calculated_at_index.yml b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/23_long_calculated_at_index.yml new file mode 100644 index 0000000000000..a6eff621344c1 --- /dev/null +++ b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/23_long_calculated_at_index.yml @@ -0,0 +1,152 @@ +--- +setup: + - do: + indices.create: + index: sensor + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + timestamp: + type: date + temperature: + type: long + voltage: + type: double + node: + type: keyword + voltage_times_ten: + type: long + script: + source: | + for (double v : doc['voltage']) { + emit((long)(v * params.multiplier)); + } + params: + multiplier: 10 + voltage_times_ten_no_dv: + type: long + doc_values: false + script: + source: | + for (double v : doc['voltage']) { + emit((long)(v * params.multiplier)); + } + params: + multiplier: 10 + # test multiple values + temperature_digits: + type: long + script: + source: | + for (long temperature : doc['temperature']) { + long t = temperature; + while (t != 0) { + emit(t % 10); + t /= 10; + } + } + + - do: + bulk: + index: sensor + refresh: true + body: | + {"index":{}} + {"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"} + {"index":{}} + {"timestamp": 1516642894000, "temperature": 201, "voltage": 5.8, "node": "b"} + {"index":{}} + {"timestamp": 1516556494000, "temperature": 202, "voltage": 5.1, "node": "a"} + {"index":{}} + {"timestamp": 1516470094000, "temperature": 198, "voltage": 5.6, "node": "b"} + {"index":{}} + {"timestamp": 1516383694000, "temperature": 200, "voltage": 4.2, "node": "c"} + {"index":{}} + {"timestamp": 1516297294000, "temperature": 202, "voltage": 4.0, "node": "c"} + +--- +"get mapping": + - do: + indices.get_mapping: + index: sensor + - match: {sensor.mappings.properties.voltage_times_ten.type: long } + - match: + sensor.mappings.properties.voltage_times_ten.script.source: | + for (double v : doc['voltage']) { + emit((long)(v * params.multiplier)); + } + - match: {sensor.mappings.properties.voltage_times_ten.script.params: {multiplier: 10} } + - match: {sensor.mappings.properties.voltage_times_ten.script.lang: painless } + +--- +"fetch fields": + - do: + search: + index: sensor + body: + sort: timestamp + fields: + - voltage_times_ten + - voltage_times_ten_no_dv + - temperature_digits + - match: {hits.total.value: 6} + - match: {hits.hits.0.fields.voltage_times_ten: [40] } + - match: {hits.hits.0.fields.temperature_digits: [2, 0, 2] } + - match: {hits.hits.0.fields.voltage_times_ten: [40] } + - match: {hits.hits.0.fields.voltage_times_ten_no_dv: [40] } + - match: {hits.hits.1.fields.voltage_times_ten: [42] } + - match: {hits.hits.2.fields.voltage_times_ten: [56] } + - match: {hits.hits.3.fields.voltage_times_ten: [51] } + - match: {hits.hits.4.fields.voltage_times_ten: [58] } + - match: {hits.hits.5.fields.voltage_times_ten: [52] } + +--- +"docvalue_fields": + - do: + search: + index: sensor + body: + sort: timestamp + docvalue_fields: + - voltage_times_ten + - temperature_digits + - match: {hits.total.value: 6} + - match: {hits.hits.0.fields.voltage_times_ten: [40] } + - match: {hits.hits.0.fields.temperature_digits: [0, 2, 2] } + - match: {hits.hits.0.fields.voltage_times_ten: [40] } + - match: {hits.hits.1.fields.voltage_times_ten: [42] } + - match: {hits.hits.2.fields.voltage_times_ten: [56] } + - match: {hits.hits.3.fields.voltage_times_ten: [51] } + - match: {hits.hits.4.fields.voltage_times_ten: [58] } + - match: {hits.hits.5.fields.voltage_times_ten: [52] } + +--- +"terms agg": + - do: + search: + index: sensor + body: + aggs: + v10: + terms: + field: voltage_times_ten + - match: {hits.total.value: 6} + - match: {aggregations.v10.buckets.0.key: 40.0} + - match: {aggregations.v10.buckets.0.doc_count: 1} + - match: {aggregations.v10.buckets.1.key: 42.0} + - match: {aggregations.v10.buckets.1.doc_count: 1} + +--- +"term query": + - do: + search: + index: sensor + body: + query: + term: + voltage_times_ten: 58 + - match: {hits.total.value: 1} + - match: {hits.hits.0._source.voltage: 5.8} diff --git a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/33_double_calculated_at_index.yml b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/33_double_calculated_at_index.yml new file mode 100644 index 0000000000000..15a8022f3c1b1 --- /dev/null +++ b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/33_double_calculated_at_index.yml @@ -0,0 +1,193 @@ +--- +setup: + - do: + indices.create: + index: sensor + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + timestamp: + type: date + temperature: + type: long + voltage: + type: double + node: + type: keyword + voltage_percent: + type: double + script: + source: | + for (double v : doc['voltage']) { + emit(v / params.max); + } + params: + max: 5.8 + voltage_percent_no_dv: + type: double + doc_values: false + script: + source: | + for (double v : doc['voltage']) { + emit(v / params.max); + } + params: + max: 5.8 + # Test fetching many values + voltage_sqrts: + type: double + script: + source: | + for (double voltage : doc['voltage']) { + double v = voltage; + while (v > 1.2) { + emit(v); + v = Math.sqrt(v); + } + } + + - do: + bulk: + index: sensor + refresh: true + body: | + {"index":{}} + {"timestamp": 1516729294000, "temperature": 200, "voltage": 5.2, "node": "a"} + {"index":{}} + {"timestamp": 1516642894000, "temperature": 201, "voltage": 5.8, "node": "b"} + {"index":{}} + {"timestamp": 1516556494000, "temperature": 202, "voltage": 5.1, "node": "a"} + {"index":{}} + {"timestamp": 1516470094000, "temperature": 198, "voltage": 5.6, "node": "b"} + {"index":{}} + {"timestamp": 1516383694000, "temperature": 200, "voltage": 4.2, "node": "c"} + {"index":{}} + {"timestamp": 1516297294000, "temperature": 202, "voltage": 4.0, "node": "c"} + +--- +"get mapping": + - do: + indices.get_mapping: + index: sensor + - match: {sensor.mappings.properties.voltage_percent.type: double } + - match: + sensor.mappings.properties.voltage_percent.script.source: | + for (double v : doc['voltage']) { + emit(v / params.max); + } + - match: {sensor.mappings.properties.voltage_percent.script.params: {max: 5.8} } + - match: {sensor.mappings.properties.voltage_percent.script.lang: painless } + +--- +"fetch fields": + - do: + search: + index: sensor + body: + sort: timestamp + fields: [voltage_percent, voltage_percent_no_dv, voltage_sqrts] + - match: {hits.total.value: 6} + - match: {hits.hits.0.fields.voltage_percent: [0.6896551724137931] } + # Scripts that scripts that emit multiple values are supported + - match: {hits.hits.0.fields.voltage_sqrts: [4.0, 2.0, 1.4142135623730951] } + - match: {hits.hits.1.fields.voltage_percent: [0.7241379310344828] } + - match: {hits.hits.1.fields.voltage_percent_no_dv: [0.7241379310344828] } + - match: {hits.hits.2.fields.voltage_percent: [0.9655172413793103] } + - match: {hits.hits.3.fields.voltage_percent: [0.8793103448275862] } + - match: {hits.hits.4.fields.voltage_percent: [1.0] } + - match: {hits.hits.5.fields.voltage_percent: [0.896551724137931] } + +--- +"docvalue_fields": + - do: + search: + index: sensor + body: + sort: timestamp + docvalue_fields: [voltage_percent, voltage_sqrts] + - match: {hits.total.value: 6} + - match: {hits.hits.0.fields.voltage_percent: [0.6896551724137931] } + # Scripts that scripts that emit multiple values are supported and their results are sorted + - match: {hits.hits.0.fields.voltage_sqrts: [1.4142135623730951, 2.0, 4.0] } + - match: {hits.hits.1.fields.voltage_percent: [0.7241379310344828] } + - match: {hits.hits.2.fields.voltage_percent: [0.9655172413793103] } + - match: {hits.hits.3.fields.voltage_percent: [0.8793103448275862] } + - match: {hits.hits.4.fields.voltage_percent: [1.0] } + - match: {hits.hits.5.fields.voltage_percent: [0.896551724137931] } + +--- +"terms agg": + - do: + search: + index: sensor + body: + aggs: + v10: + terms: + field: voltage_percent + - match: {hits.total.value: 6} + - match: {aggregations.v10.buckets.0.key: 0.6896551724137931} + - match: {aggregations.v10.buckets.0.doc_count: 1} + - match: {aggregations.v10.buckets.1.key: 0.7241379310344828} + - match: {aggregations.v10.buckets.1.doc_count: 1} + +--- +"range query": + - do: + search: + index: sensor + body: + query: + range: + voltage_percent: + lt: .7 + - match: {hits.total.value: 1} + - match: {hits.hits.0._source.voltage: 4.0} + + - do: + search: + index: sensor + body: + query: + range: + voltage_percent: + gt: 1 + - match: {hits.total.value: 0} + + - do: + search: + index: sensor + body: + query: + range: + voltage_percent: + gte: 1 + - match: {hits.total.value: 1} + - match: {hits.hits.0._source.voltage: 5.8} + + - do: + search: + index: sensor + body: + query: + range: + voltage_percent: + gte: .7 + lte: .8 + - match: {hits.total.value: 1} + - match: {hits.hits.0._source.voltage: 4.2} + +--- +"term query": + - do: + search: + index: sensor + body: + query: + term: + voltage_percent: 1.0 + - match: {hits.total.value: 1} + - match: {hits.hits.0._source.voltage: 5.8} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java new file mode 100644 index 0000000000000..2e4bb77a9764c --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentLeafReader.java @@ -0,0 +1,486 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.index.mapper; + +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValuesType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafMetaData; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PointValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.memory.MemoryIndex; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Consumer; +import java.util.stream.Collectors; + +/** + * A {@link LeafReader} over a lucene document that exposes doc values and stored fields. + * Note that unlike lucene's {@link MemoryIndex} implementation, this holds no state and + * does not attempt to do any analysis on text fields. It also supports stored + * fields where MemoryIndex does not. It is used to back index-time scripts that + * reference field data and stored fields from a document that has not yet been + * indexed. + */ +class DocumentLeafReader extends LeafReader { + + private final ParseContext.Document document; + private final Map> calculatedFields; + private final Set fieldPath = new LinkedHashSet<>(); + + DocumentLeafReader(ParseContext.Document document, Map> calculatedFields) { + this.document = document; + this.calculatedFields = calculatedFields; + } + + private void checkField(String field) { + if (calculatedFields.containsKey(field)) { + // this means that a mapper script is referring to another calculated field; + // in which case we need to execute that field first. We also check for loops here + if (fieldPath.add(field) == false) { + throw new IllegalArgumentException( + "Loop in field resolution detected: " + String.join("->", fieldPath) + "->" + field + ); + } + calculatedFields.get(field).accept(this.getContext()); + fieldPath.remove(field); + } + } + + @Override + public NumericDocValues getNumericDocValues(String field) throws IOException { + checkField(field); + List values = document.getFields().stream() + .filter(f -> Objects.equals(f.name(), field)) + .filter(f -> f.fieldType().docValuesType() == DocValuesType.NUMERIC) + .map(IndexableField::numericValue) + .sorted() + .collect(Collectors.toList()); + return numericDocValues(values); + } + + @Override + public BinaryDocValues getBinaryDocValues(String field) throws IOException { + checkField(field); + List values = document.getFields().stream() + .filter(f -> Objects.equals(f.name(), field)) + .filter(f -> f.fieldType().docValuesType() == DocValuesType.BINARY) + .map(IndexableField::binaryValue) + .sorted() + .collect(Collectors.toList()); + return binaryDocValues(values); + } + + @Override + public SortedDocValues getSortedDocValues(String field) throws IOException { + checkField(field); + List values = document.getFields().stream() + .filter(f -> Objects.equals(f.name(), field)) + .filter(f -> f.fieldType().docValuesType() == DocValuesType.SORTED) + .map(IndexableField::binaryValue) + .sorted() + .collect(Collectors.toList()); + return sortedDocValues(values); + } + + @Override + public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException { + checkField(field); + List values = document.getFields().stream() + .filter(f -> Objects.equals(f.name(), field)) + .filter(f -> f.fieldType().docValuesType() == DocValuesType.SORTED_NUMERIC) + .map(IndexableField::numericValue) + .sorted() + .collect(Collectors.toList()); + return sortedNumericDocValues(values); + } + + @Override + public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { + List values = document.getFields().stream() + .filter(f -> Objects.equals(f.name(), field)) + .filter(f -> f.fieldType().docValuesType() == DocValuesType.SORTED_SET) + .map(IndexableField::binaryValue) + .sorted() + .collect(Collectors.toList()); + return sortedSetDocValues(values); + } + + @Override + public FieldInfos getFieldInfos() { + return new FieldInfos(new FieldInfo[0]); + } + + @Override + public void document(int docID, StoredFieldVisitor visitor) throws IOException { + List fields = document.getFields().stream() + .filter(f -> f.fieldType().stored()) + .collect(Collectors.toList()); + for (IndexableField field : fields) { + FieldInfo fieldInfo = fieldInfo(field.name()); + if (visitor.needsField(fieldInfo) != StoredFieldVisitor.Status.YES) { + continue; + } + if (field.numericValue() != null) { + Number v = field.numericValue(); + if (v instanceof Integer) { + visitor.intField(fieldInfo, v.intValue()); + } else if (v instanceof Long) { + visitor.longField(fieldInfo, v.longValue()); + } else if (v instanceof Float) { + visitor.floatField(fieldInfo, v.floatValue()); + } else if (v instanceof Double) { + visitor.doubleField(fieldInfo, v.doubleValue()); + } + } else if (field.stringValue() != null) { + visitor.stringField(fieldInfo, field.stringValue().getBytes(StandardCharsets.UTF_8)); + } else if (field.binaryValue() != null) { + // We can't just pass field.binaryValue().bytes here as there may be offset/length + // considerations + byte[] data = new byte[field.binaryValue().length]; + System.arraycopy(field.binaryValue().bytes, field.binaryValue().offset, data, 0, data.length); + visitor.binaryField(fieldInfo, data); + } + } + } + + @Override + public CacheHelper getCoreCacheHelper() { + throw new UnsupportedOperationException(); + } + + @Override + public Terms terms(String field) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public NumericDocValues getNormValues(String field) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public Bits getLiveDocs() { + throw new UnsupportedOperationException(); + } + + @Override + public PointValues getPointValues(String field) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public void checkIntegrity() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public LeafMetaData getMetaData() { + throw new UnsupportedOperationException(); + } + + @Override + public Fields getTermVectors(int docID) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public int numDocs() { + throw new UnsupportedOperationException(); + } + + @Override + public int maxDoc() { + throw new UnsupportedOperationException(); + } + + @Override + protected void doClose() throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + public CacheHelper getReaderCacheHelper() { + throw new UnsupportedOperationException(); + } + + // Our StoredFieldsVisitor implementations only check the name of the passed-in + // FieldInfo, so that's the only value we need to set here. + private static FieldInfo fieldInfo(String name) { + return new FieldInfo( + name, + 0, + false, + false, + false, + IndexOptions.NONE, + DocValuesType.NONE, + -1, + Collections.emptyMap(), + 0, + 0, + 0, + false + ); + } + + private static NumericDocValues numericDocValues(List values) { + if (values.size() == 0) { + return null; + } + DocIdSetIterator disi = DocIdSetIterator.all(1); + return new NumericDocValues() { + @Override + public long longValue() { + return values.get(0).longValue(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return disi.advance(target) == target; + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public long cost() { + return disi.cost(); + } + }; + } + + private static SortedNumericDocValues sortedNumericDocValues(List values) { + if (values.size() == 0) { + return null; + } + DocIdSetIterator disi = DocIdSetIterator.all(1); + return new SortedNumericDocValues() { + + int i = -1; + + @Override + public long nextValue() { + i++; + return values.get(i).longValue(); + } + + @Override + public int docValueCount() { + return values.size(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + i = -1; + return disi.advance(target) == target; + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public int nextDoc() throws IOException { + i = -1; + return disi.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + i = -1; + return disi.advance(target); + } + + @Override + public long cost() { + return disi.cost(); + } + }; + } + + private static BinaryDocValues binaryDocValues(List values) { + if (values.size() == 0) { + return null; + } + DocIdSetIterator disi = DocIdSetIterator.all(1); + return new BinaryDocValues() { + @Override + public BytesRef binaryValue() { + return values.get(0); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return disi.advance(target) == target; + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public long cost() { + return disi.cost(); + } + }; + } + + private static SortedDocValues sortedDocValues(List values) { + if (values.size() == 0) { + return null; + } + DocIdSetIterator disi = DocIdSetIterator.all(1); + return new SortedDocValues() { + + @Override + public int ordValue() { + return 0; + } + + @Override + public BytesRef lookupOrd(int ord) { + return values.get(0); + } + + @Override + public int getValueCount() { + return values.size(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + return disi.advance(target) == target; + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public int nextDoc() throws IOException { + return disi.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + return disi.advance(target); + } + + @Override + public long cost() { + return disi.cost(); + } + }; + } + + private static SortedSetDocValues sortedSetDocValues(List values) { + if (values.size() == 0) { + return null; + } + DocIdSetIterator disi = DocIdSetIterator.all(1); + return new SortedSetDocValues() { + + int i = -1; + + @Override + public long nextOrd() { + i++; + if (i >= values.size()) { + return NO_MORE_ORDS; + } + return i; + } + + @Override + public BytesRef lookupOrd(long ord) { + return values.get((int)ord); + } + + @Override + public long getValueCount() { + return values.size(); + } + + @Override + public boolean advanceExact(int target) throws IOException { + i = -1; + return disi.advance(target) == target; + } + + @Override + public int docID() { + return disi.docID(); + } + + @Override + public int nextDoc() throws IOException { + i = -1; + return disi.nextDoc(); + } + + @Override + public int advance(int target) throws IOException { + i = -1; + return disi.advance(target); + } + + @Override + public long cost() { + return disi.cost(); + } + }; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java index a10c2e09849f6..222a4392c97e1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DocumentParser.java @@ -10,6 +10,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.Query; import org.elasticsearch.Version; import org.elasticsearch.common.Strings; @@ -21,14 +22,20 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; +import org.elasticsearch.index.fielddata.IndexFieldDataCache; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.indices.breaker.NoneCircuitBreakerService; +import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.function.Consumer; import java.util.function.Function; /** A parser for documents, given mappings from a DocumentMapper */ @@ -93,6 +100,7 @@ private static boolean containsDisabledObjectMapper(ObjectMapper objectMapper, S private static void internalParseDocument(RootObjectMapper root, MetadataFieldMapper[] metadataFieldsMappers, ParseContext context, XContentParser parser) throws IOException { + final boolean emptyDoc = isEmptyDoc(root, parser); for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) { @@ -106,11 +114,46 @@ private static void internalParseDocument(RootObjectMapper root, MetadataFieldMa parseObjectOrNested(context, root); } + executeIndexTimeScripts(context); + for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) { metadataMapper.postParse(context); } } + private static void executeIndexTimeScripts(ParseContext context) { + List indexTimeScriptMappers = context.mappingLookup().indexTimeScriptMappers(); + if (indexTimeScriptMappers.isEmpty()) { + return; + } + SearchLookup searchLookup = new SearchLookup( + context.mappingLookup().indexTimeLookup()::get, + (ft, lookup) -> ft.fielddataBuilder(context.indexSettings().getIndex().getName(), lookup).build( + new IndexFieldDataCache.None(), + new NoneCircuitBreakerService()) + ); + // field scripts can be called both by the loop at the end of this method and via + // the document reader, so to ensure that we don't run them multiple times we + // guard them with an 'executed' boolean + Map> fieldScripts = new HashMap<>(); + indexTimeScriptMappers.forEach(mapper -> fieldScripts.put(mapper.name(), new Consumer<>() { + boolean executed = false; + @Override + public void accept(LeafReaderContext leafReaderContext) { + if (executed == false) { + mapper.executeScript(searchLookup, leafReaderContext, 0, context); + executed = true; + } + } + })); + + // call the index script on all field mappers configured with one + DocumentLeafReader reader = new DocumentLeafReader(context.rootDoc(), fieldScripts); + for (Consumer script : fieldScripts.values()) { + script.accept(reader.getContext()); + } + } + private static void validateStart(XContentParser parser) throws IOException { // will result in START_OBJECT XContentParser.Token token = parser.nextToken(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java index ca675d3b5c674..afb11daf766fd 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DoubleScriptFieldType.java @@ -34,7 +34,7 @@ public final class DoubleScriptFieldType extends AbstractScriptFieldType { - private static final DoubleFieldScript.Factory PARSE_FROM_SOURCE + static final DoubleFieldScript.Factory PARSE_FROM_SOURCE = (field, params, lookup) -> (DoubleFieldScript.LeafFactory) ctx -> new DoubleFieldScript ( field, diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java b/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java index 2419219ed70dc..9c678449fcc38 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DynamicFieldsBuilder.java @@ -271,7 +271,12 @@ public void newDynamicStringField(ParseContext context, String name) throws IOEx @Override public void newDynamicLongField(ParseContext context, String name) throws IOException { createDynamicField( - new NumberFieldMapper.Builder(name, NumberFieldMapper.NumberType.LONG, context.indexSettings().getSettings()), context); + new NumberFieldMapper.Builder( + name, + NumberFieldMapper.NumberType.LONG, + null, + context.indexSettings().getSettings() + ), context); } @Override @@ -279,8 +284,11 @@ public void newDynamicDoubleField(ParseContext context, String name) throws IOEx // no templates are defined, we use float by default instead of double // since this is much more space-efficient and should be enough most of // the time - createDynamicField(new NumberFieldMapper.Builder(name, - NumberFieldMapper.NumberType.FLOAT, context.indexSettings().getSettings()), context); + createDynamicField(new NumberFieldMapper.Builder( + name, + NumberFieldMapper.NumberType.FLOAT, + null, + context.indexSettings().getSettings()), context); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 7c6914b28bde1..d2350f504a381 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -9,6 +9,7 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.document.Field; +import org.apache.lucene.index.LeafReaderContext; import org.elasticsearch.Version; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.TriFunction; @@ -25,6 +26,9 @@ import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.mapper.FieldNamesFieldMapper.FieldNamesFieldType; import org.elasticsearch.index.mapper.Mapper.TypeParser.ParserContext; +import org.elasticsearch.script.Script; +import org.elasticsearch.script.ScriptType; +import org.elasticsearch.search.lookup.SearchLookup; import java.io.IOException; import java.util.ArrayList; @@ -168,6 +172,25 @@ public void parse(ParseContext context) throws IOException { multiFields.parse(this, context); } + /** + * @return whether this field mapper uses a script to generate its values + */ + public boolean hasScript() { + return false; + } + + /** + * Execute the index-time script associated with this field mapper. + * + * This method should only be called if {@link #hasScript()} has returned {@code true} + * @param searchLookup a SearchLookup to be passed the script + * @param ctx a LeafReaderContext exposing values from an incoming document + * @param pc the ParseContext over the incoming document + */ + public void executeScript(SearchLookup searchLookup, LeafReaderContext ctx, int doc, ParseContext pc) { + throw new UnsupportedOperationException("FieldMapper " + name() + " does not have an index-time script"); + } + /** * Parse the field value and populate the fields on {@link ParseContext#doc()}. * @@ -509,6 +532,8 @@ public static final class Parameter implements Supplier { private MergeValidator mergeValidator; private T value; private boolean isSet; + private List> requires = new ArrayList<>(); + private List> precludes = new ArrayList<>(); /** * Creates a new Parameter @@ -639,10 +664,32 @@ public Parameter setMergeValidator(MergeValidator mergeValidator) { return this; } + public Parameter requiresParameters(Parameter... ps) { + this.requires.addAll(Arrays.asList(ps)); + return this; + } + + public Parameter precludesParameters(Parameter... ps) { + this.precludes.addAll(Arrays.asList(ps)); + return this; + } + private void validate() { if (validator != null) { validator.accept(getValue()); } + if (this.isConfigured()) { + for (Parameter p : requires) { + if (p.isConfigured() == false) { + throw new IllegalArgumentException("Field [" + name + "] requires field [" + p.name + "] to be configured"); + } + } + for (Parameter p : precludes) { + if (p.isConfigured()) { + throw new IllegalArgumentException("Field [" + p.name + "] cannot be set in conjunction with field [" + name + "]"); + } + } + } } private void init(FieldMapper toInit) { @@ -823,6 +870,32 @@ public static Parameter docValuesParam(Function i return Parameter.boolParam("doc_values", false, initializer, defaultValue); } + /** + * Defines a script parameter + * @param initializer retrieves the equivalent parameter from an existing FieldMapper for use in merges + * @return a script parameter + */ + public static FieldMapper.Parameter