diff --git a/docs/reference/search.asciidoc b/docs/reference/search.asciidoc index 51b199c19f77d..79d3c7a93fd26 100644 --- a/docs/reference/search.asciidoc +++ b/docs/reference/search.asciidoc @@ -105,3 +105,5 @@ include::search/percolate.asciidoc[] include::search/more-like-this.asciidoc[] +include::search/field-stats.asciidoc[] + diff --git a/docs/reference/search/field-stats.asciidoc b/docs/reference/search/field-stats.asciidoc new file mode 100644 index 0000000000000..b6cd5db496710 --- /dev/null +++ b/docs/reference/search/field-stats.asciidoc @@ -0,0 +1,170 @@ +[[search-field-stats]] +== Field stats API + +experimental[] + +The field stats api allows one to find statistical properties of a field without executing a search, but +looking up measurements that are natively available in the Lucene index. This can be useful to explore a dataset which +you don't know much about. For example, this allows creating a histogram aggregation with meaningful intervals. + +The field stats api by defaults executes on all indices, but can execute on specific indices too. + +All indices: + +[source,js] +-------------------------------------------------- +curl -XGET "http://localhost:9200/_field_stats?fields=rating" +-------------------------------------------------- + +Specific indices: + +[source,js] +-------------------------------------------------- +curl -XGET "http://localhost:9200/index1,index2/_field_stats?fields=rating" +-------------------------------------------------- + +Supported request options: +* `fields` - A list of fields to compute stats for. +* `level` - Defines if field stats should be returned on a per index level or on a cluster wide level. Valid values are + `indices` and `cluster`. Defaults to `cluster`. + +==== Field statistics + +The field stats api is supported on string based, number based and date based fields and can return the following statistics per field: + +* `max_doc` - The total number of documents. +* `doc_count` - The number of documents that have at least one term for this field, or -1 if this measurement isn't available on one or more shards. +* `density` - The percentage of documents that have at least one value for this field. This is a derived statistic and is based on the `max_doc` and `doc_count`. +* `sum_doc_freq` - The sum of each term's document frequency in this field, or -1 if this measurement isn't available on one or more shards. + Document frequency is the number of documents containing a particular term. +* `sum_total_term_freq` - The sum of the term frequencies of all terms in this field across all documents, or -1 if this measurement isn't available on one or more shards. + Term frequency is the total number of occurrences of a term in a particular document and field. +* `min_value` - The lowest value in the field represented in a displayable form. +* `max_value` - The highest value in the field represented in a displayable form. + +Note that for all the mentioned statistics, documents marked as deleted aren't taken into account. The documents marked +as deleted are are only taken into account when the segments these documents reside on are merged away. + +==== Example + +[source,js] +-------------------------------------------------- +curl -XGET "http://localhost:9200/_field_stats?fields=rating,answer_count,creation_date,display_name" +-------------------------------------------------- + +[source,js] +-------------------------------------------------- +{ + "_shards": { + "total": 1, + "successful": 1, + "failed": 0 + }, + "indices": { + "_all": { <1> + "fields": { + "creation_date": { + "max_doc": 1326564, + "doc_count": 564633, + "density": 42, + "sum_doc_freq": 2258532, + "sum_total_term_freq": -1, + "min_value": "2008-08-01T16:37:51.513Z", + "max_value": "2013-06-02T03:23:11.593Z" + }, + "display_name": { + "max_doc": 1326564, + "doc_count": 126741, + "density": 9, + "sum_doc_freq": 166535, + "sum_total_term_freq": 166616, + "min_value": "0", + "max_value": "정혜선" + }, + "answer_count": { + "max_doc": 1326564, + "doc_count": 139885, + "density": 10, + "sum_doc_freq": 559540, + "sum_total_term_freq": -1, + "min_value": 0, + "max_value": 160 + }, + "rating": { + "max_doc": 1326564, + "doc_count": 437892, + "density": 33, + "sum_doc_freq": 1751568, + "sum_total_term_freq": -1, + "min_value": -14, + "max_value": 1277 + } + } + } + } +} +-------------------------------------------------- + +<1> The `_all` key indicates that it contains the field stats of all indices in the cluster. + +With level set to `indices`: + +[source,js] +-------------------------------------------------- +curl -XGET "http://localhost:9200/_field_stats?fields=rating,answer_count,creation_date,display_name&level=indices" +-------------------------------------------------- + +[source,js] +-------------------------------------------------- +{ + "_shards": { + "total": 1, + "successful": 1, + "failed": 0 + }, + "indices": { + "stack": { <1> + "fields": { + "creation_date": { + "max_doc": 1326564, + "doc_count": 564633, + "density": 42, + "sum_doc_freq": 2258532, + "sum_total_term_freq": -1, + "min_value": "2008-08-01T16:37:51.513Z", + "max_value": "2013-06-02T03:23:11.593Z" + }, + "display_name": { + "max_doc": 1326564, + "doc_count": 126741, + "density": 9, + "sum_doc_freq": 166535, + "sum_total_term_freq": 166616, + "min_value": "0", + "max_value": "정혜선" + }, + "answer_count": { + "max_doc": 1326564, + "doc_count": 139885, + "density": 10, + "sum_doc_freq": 559540, + "sum_total_term_freq": -1, + "min_value": 0, + "max_value": 160 + }, + "rating": { + "max_doc": 1326564, + "doc_count": 437892, + "density": 33, + "sum_doc_freq": 1751568, + "sum_total_term_freq": -1, + "min_value": -14, + "max_value": 1277 + } + } + } + } +} +-------------------------------------------------- + +<1> The `stack` key means it contains all field stats for the `stack` index. \ No newline at end of file diff --git a/rest-api-spec/api/field_stats.json b/rest-api-spec/api/field_stats.json new file mode 100644 index 0000000000000..e3c5e6d45df3f --- /dev/null +++ b/rest-api-spec/api/field_stats.json @@ -0,0 +1,46 @@ +{ + "field_stats": { + "documentation": "http://www.elastic.co/guide/en/elasticsearch/reference/master/search-field-stats.html", + "methods": ["GET", "POST"], + "url": { + "path": "/_field_stats", + "paths": [ + "/_field_stats", + "/{index}/_field_stats" + ], + "parts": { + "index": { + "type" : "list", + "description" : "A comma-separated list of index names; use `_all` or empty string to perform the operation on all indices" + } + }, + "params": { + "fields": { + "type" : "list", + "description" : "A comma-separated list of fields for to get field statistics for (min value, max value, and more)" + }, + "level": { + "type" : "enum", + "options" : ["indices", "cluster"], + "default" : "cluster", + "description" : "Defines if field stats should be returned on a per index level or on a cluster wide level" + }, + "ignore_unavailable": { + "type" : "boolean", + "description" : "Whether specified concrete indices should be ignored when unavailable (missing or closed)" + }, + "allow_no_indices": { + "type" : "boolean", + "description" : "Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes `_all` string or when no indices have been specified)" + }, + "expand_wildcards": { + "type" : "enum", + "options" : ["open","closed","none","all"], + "default" : "open", + "description" : "Whether to expand wildcard expression to concrete indices that are open, closed or both." + } + } + }, + "body": null + } +} diff --git a/rest-api-spec/test/field_stats/10_basics.yaml b/rest-api-spec/test/field_stats/10_basics.yaml new file mode 100644 index 0000000000000..61a575603daf4 --- /dev/null +++ b/rest-api-spec/test/field_stats/10_basics.yaml @@ -0,0 +1,52 @@ +--- +"Basic field stats": + - do: + index: + index: test_1 + type: test + id: id_1 + body: { foo: "bar", number: 123 } + + - do: + indices.refresh: {} + + - do: + field_stats: + index: test_1 + fields: [foo, number] + + - match: { indices._all.fields.foo.max_doc: 1 } + - match: { indices._all.fields.foo.doc_count: 1 } + - match: { indices._all.fields.foo.min_value: "bar" } + - match: { indices._all.fields.foo.max_value: "bar" } + - match: { indices._all.fields.number.max_doc: 1 } + - match: { indices._all.fields.number.doc_count: 1 } + - match: { indices._all.fields.number.min_value: 123 } + - match: { indices._all.fields.number.max_value: 123 } + +--- +"Basic field stats with level set to indices": + - do: + index: + index: test_1 + type: test + id: id_1 + body: { foo: "bar", number: 123 } + + - do: + indices.refresh: {} + + - do: + field_stats: + index: test_1 + fields: [foo, number] + level: indices + + - match: { indices.test_1.fields.foo.max_doc: 1 } + - match: { indices.test_1.fields.foo.doc_count: 1 } + - match: { indices.test_1.fields.foo.min_value: "bar" } + - match: { indices.test_1.fields.foo.max_value: "bar" } + - match: { indices.test_1.fields.number.max_doc: 1 } + - match: { indices.test_1.fields.number.doc_count: 1 } + - match: { indices.test_1.fields.number.min_value: 123 } + - match: { indices.test_1.fields.number.max_value: 123 } diff --git a/src/main/java/org/elasticsearch/action/ActionModule.java b/src/main/java/org/elasticsearch/action/ActionModule.java index 21df6223a28b0..49d841567b4f1 100644 --- a/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/src/main/java/org/elasticsearch/action/ActionModule.java @@ -134,6 +134,8 @@ import org.elasticsearch.action.exists.TransportExistsAction; import org.elasticsearch.action.explain.ExplainAction; import org.elasticsearch.action.explain.TransportExplainAction; +import org.elasticsearch.action.fieldstats.FieldStatsAction; +import org.elasticsearch.action.fieldstats.TransportFieldStatsTransportAction; import org.elasticsearch.action.get.*; import org.elasticsearch.action.index.IndexAction; import org.elasticsearch.action.index.TransportIndexAction; @@ -312,6 +314,8 @@ protected void configure() { registerAction(GetIndexedScriptAction.INSTANCE, TransportGetIndexedScriptAction.class); registerAction(DeleteIndexedScriptAction.INSTANCE, TransportDeleteIndexedScriptAction.class); + registerAction(FieldStatsAction.INSTANCE, TransportFieldStatsTransportAction.class); + // register Name -> GenericAction Map that can be injected to instances. MapBinder actionsBinder = MapBinder.newMapBinder(binder(), String.class, GenericAction.class); diff --git a/src/main/java/org/elasticsearch/action/fieldstats/FieldStats.java b/src/main/java/org/elasticsearch/action/fieldstats/FieldStats.java new file mode 100644 index 0000000000000..ea2d4cca90d17 --- /dev/null +++ b/src/main/java/org/elasticsearch/action/fieldstats/FieldStats.java @@ -0,0 +1,455 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.fieldstats; + +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Streamable; +import org.elasticsearch.common.joda.FormatDateTimeFormatter; +import org.elasticsearch.common.joda.Joda; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentBuilderString; + +import java.io.IOException; + +public abstract class FieldStats implements Streamable, ToXContent { + + private byte type; + private long maxDoc; + private long docCount; + private long sumDocFreq; + private long sumTotalTermFreq; + protected T minValue; + protected T maxValue; + + protected FieldStats() { + } + + protected FieldStats(int type, long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq) { + this.type = (byte) type; + this.maxDoc = maxDoc; + this.docCount = docCount; + this.sumDocFreq = sumDocFreq; + this.sumTotalTermFreq = sumTotalTermFreq; + } + + byte getType() { + return type; + } + + /** + * @return the total number of documents. + * + * Note that, documents marked as deleted that haven't yet been merged way aren't taken into account. + */ + public long getMaxDoc() { + return maxDoc; + } + + /** + * @return the number of documents that have at least one term for this field, or -1 if this measurement isn't available. + * + * Note that, documents marked as deleted that haven't yet been merged way aren't taken into account. + */ + public long getDocCount() { + return docCount; + } + + /** + * @return The percentage of documents that have at least one value for this field. + * + * This is a derived statistic and is based on: 'doc_count / max_doc' + */ + public int getDensity() { + if (docCount < 0 || maxDoc <= 0) { + return -1; + } + return (int) (docCount * 100 / maxDoc); + } + + /** + * @return the sum of each term's document frequency in this field, or -1 if this measurement isn't available. + * Document frequency is the number of documents containing a particular term. + * + * Note that, documents marked as deleted that haven't yet been merged way aren't taken into account. + */ + public long getSumDocFreq() { + return sumDocFreq; + } + + /** + * @return the sum of the term frequencies of all terms in this field across all documents, or -1 if this measurement + * isn't available. Term frequency is the total number of occurrences of a term in a particular document and field. + * + * Note that, documents marked as deleted that haven't yet been merged way aren't taken into account. + */ + public long getSumTotalTermFreq() { + return sumTotalTermFreq; + } + + /** + * @return the lowest value in the field represented as a string. + * + * Note that, documents marked as deleted that haven't yet been merged way aren't taken into account. + */ + public abstract String getMinValue(); + + /** + * @return the highest value in the field represented as a string. + * + * Note that, documents marked as deleted that haven't yet been merged way aren't taken into account. + */ + public abstract String getMaxValue(); + + /** + * Merges the provided stats into this stats instance. + */ + public void append(FieldStats stats) { + this.maxDoc += stats.maxDoc; + if (stats.docCount == -1) { + this.docCount = -1; + } else if (this.docCount != -1) { + this.docCount += stats.docCount; + } + if (stats.sumDocFreq == -1) { + this.sumDocFreq = -1; + } else if (this.sumDocFreq != -1) { + this.sumDocFreq += stats.sumDocFreq; + } + if (stats.sumTotalTermFreq == -1) { + this.sumTotalTermFreq = -1; + } else if (this.sumTotalTermFreq != -1) { + this.sumTotalTermFreq += stats.sumTotalTermFreq; + } + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field(Fields.MAX_DOC, maxDoc); + builder.field(Fields.DOC_COUNT, docCount); + builder.field(Fields.DENSITY, getDensity()); + builder.field(Fields.SUM_DOC_FREQ, sumDocFreq); + builder.field(Fields.SUM_TOTAL_TERM_FREQ, sumTotalTermFreq); + toInnerXContent(builder); + builder.endObject(); + return builder; + } + + protected void toInnerXContent(XContentBuilder builder) throws IOException { + builder.field(Fields.MIN_VALUE, minValue); + builder.field(Fields.MAX_VALUE, maxValue); + } + + @Override + public void readFrom(StreamInput in) throws IOException { + maxDoc = in.readVLong(); + docCount = in.readLong(); + sumDocFreq = in.readLong(); + sumTotalTermFreq = in.readLong(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeByte(type); + out.writeVLong(maxDoc); + out.writeLong(docCount); + out.writeLong(sumDocFreq); + out.writeLong(sumTotalTermFreq); + } + + public static class Long extends FieldStats { + + public Long() { + } + + public Long(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, long minValue, long maxValue) { + this(0, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, minValue, maxValue); + } + + protected Long(int type, long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, long minValue, long maxValue) { + super(type, maxDoc, docCount, sumDocFreq, sumTotalTermFreq); + this.minValue = minValue; + this.maxValue = maxValue; + } + + @Override + public String getMinValue() { + return String.valueOf(minValue.longValue()); + } + + @Override + public String getMaxValue() { + return String.valueOf(maxValue.longValue()); + } + + @Override + public void append(FieldStats stats) { + super.append(stats); + Long other = (Long) stats; + this.minValue = Math.min(other.minValue, minValue); + this.maxValue = Math.max(other.maxValue, maxValue); + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + minValue = in.readLong(); + maxValue = in.readLong(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeLong(minValue); + out.writeLong(maxValue); + } + + } + + public static final class Float extends FieldStats { + + public Float() { + } + + public Float(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, float minValue, float maxValue) { + super(1, maxDoc, docCount, sumDocFreq, sumTotalTermFreq); + this.minValue = minValue; + this.maxValue = maxValue; + } + + @Override + public String getMinValue() { + return String.valueOf(minValue.floatValue()); + } + + @Override + public String getMaxValue() { + return String.valueOf(maxValue.floatValue()); + } + + @Override + public void append(FieldStats stats) { + super.append(stats); + Float other = (Float) stats; + this.minValue = Math.min(other.minValue, minValue); + this.maxValue = Math.max(other.maxValue, maxValue); + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + minValue = in.readFloat(); + maxValue = in.readFloat(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeFloat(minValue); + out.writeFloat(maxValue); + } + + } + + public static final class Double extends FieldStats { + + public Double() { + } + + public Double(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, double minValue, double maxValue) { + super(2, maxDoc, docCount, sumDocFreq, sumTotalTermFreq); + this.minValue = minValue; + this.maxValue = maxValue; + } + + @Override + public String getMinValue() { + return String.valueOf(minValue.doubleValue()); + } + + @Override + public String getMaxValue() { + return String.valueOf(maxValue.doubleValue()); + } + + @Override + public void append(FieldStats stats) { + super.append(stats); + Double other = (Double) stats; + this.minValue = Math.min(other.minValue, minValue); + this.maxValue = Math.max(other.maxValue, maxValue); + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + minValue = in.readDouble(); + maxValue = in.readDouble(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeDouble(minValue); + out.writeDouble(maxValue); + } + + } + + public static final class Text extends FieldStats { + + public Text() { + } + + public Text(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, BytesRef minValue, BytesRef maxValue) { + super(3, maxDoc, docCount, sumDocFreq, sumTotalTermFreq); + this.minValue = minValue; + this.maxValue = maxValue; + } + + @Override + public String getMinValue() { + return minValue.utf8ToString(); + } + + @Override + public String getMaxValue() { + return maxValue.utf8ToString(); + } + + @Override + public void append(FieldStats stats) { + super.append(stats); + Text other = (Text) stats; + if (other.minValue.compareTo(minValue) < 0) { + minValue = other.minValue; + } + if (other.maxValue.compareTo(maxValue) > 0) { + maxValue = other.maxValue; + } + } + + @Override + protected void toInnerXContent(XContentBuilder builder) throws IOException { + builder.field(Fields.MIN_VALUE, getMinValue()); + builder.field(Fields.MAX_VALUE, getMaxValue()); + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + minValue = in.readBytesRef(); + maxValue = in.readBytesRef(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeBytesRef(minValue); + out.writeBytesRef(maxValue); + } + + } + + public static final class Date extends Long { + + private FormatDateTimeFormatter dateFormatter; + + public Date() { + } + + public Date(long maxDoc, long docCount, long sumDocFreq, long sumTotalTermFreq, long minValue, long maxValue, FormatDateTimeFormatter dateFormatter) { + super(4, maxDoc, docCount, sumDocFreq, sumTotalTermFreq, minValue, maxValue); + this.dateFormatter = dateFormatter; + } + + @Override + public String getMinValue() { + return dateFormatter.printer().print(minValue); + } + + @Override + public String getMaxValue() { + return dateFormatter.printer().print(maxValue); + } + + @Override + protected void toInnerXContent(XContentBuilder builder) throws IOException { + builder.field(Fields.MIN_VALUE, getMinValue()); + builder.field(Fields.MAX_VALUE, getMaxValue()); + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + dateFormatter = Joda.forPattern(in.readString()); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(dateFormatter.format()); + } + + } + + public static FieldStats read(StreamInput in) throws IOException { + FieldStats stats; + byte type = in.readByte(); + switch (type) { + case 0: + stats = new Long(); + break; + case 1: + stats = new Float(); + break; + case 2: + stats = new Double(); + break; + case 3: + stats = new Text(); + break; + case 4: + stats = new Date(); + break; + default: + throw new IllegalArgumentException("Illegal type [" + type + "]"); + } + stats.type = type; + stats.readFrom(in); + return stats; + } + + private final static class Fields { + + final static XContentBuilderString MAX_DOC = new XContentBuilderString("max_doc"); + final static XContentBuilderString DOC_COUNT = new XContentBuilderString("doc_count"); + final static XContentBuilderString DENSITY = new XContentBuilderString("density"); + final static XContentBuilderString SUM_DOC_FREQ = new XContentBuilderString("sum_doc_freq"); + final static XContentBuilderString SUM_TOTAL_TERM_FREQ = new XContentBuilderString("sum_total_term_freq"); + final static XContentBuilderString MIN_VALUE = new XContentBuilderString("min_value"); + final static XContentBuilderString MAX_VALUE = new XContentBuilderString("max_value"); + + } + +} diff --git a/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsAction.java b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsAction.java new file mode 100644 index 0000000000000..fb4a3f7783340 --- /dev/null +++ b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsAction.java @@ -0,0 +1,45 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.fieldstats; + +import org.elasticsearch.action.ClientAction; +import org.elasticsearch.client.Client; + +/** + */ +public class FieldStatsAction extends ClientAction { + + public static final FieldStatsAction INSTANCE = new FieldStatsAction(); + public static final String NAME = "indices:data/read/field_stats"; + + private FieldStatsAction() { + super(NAME); + } + + @Override + public FieldStatsResponse newResponse() { + return new FieldStatsResponse(); + } + + @Override + public FieldStatsRequestBuilder newRequestBuilder(Client client) { + return new FieldStatsRequestBuilder(client); + } +} diff --git a/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsRequest.java b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsRequest.java new file mode 100644 index 0000000000000..ff61fe88ee94f --- /dev/null +++ b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsRequest.java @@ -0,0 +1,77 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.fieldstats; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.ValidateActions; +import org.elasticsearch.action.support.broadcast.BroadcastOperationRequest; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + */ +public class FieldStatsRequest extends BroadcastOperationRequest { + + public final static String DEFAULT_LEVEL = "cluster"; + + private String[] fields; + private String level = DEFAULT_LEVEL; + + public String[] fields() { + return fields; + } + + public void fields(String[] fields) { + this.fields = fields; + } + + public String level() { + return level; + } + + public void level(String level) { + this.level = level; + } + + @Override + public ActionRequestValidationException validate() { + ActionRequestValidationException validationException = super.validate(); + if ("cluster".equals(level) == false && "indices".equals(level) == false) { + validationException = ValidateActions.addValidationError("invalid level option [" + level + "]", validationException); + } + return validationException; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + fields = in.readStringArray(); + level = in.readString(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeStringArrayNullable(fields); + out.writeString(level); + } +} diff --git a/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsRequestBuilder.java b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsRequestBuilder.java new file mode 100644 index 0000000000000..4c8d0b6c73b11 --- /dev/null +++ b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsRequestBuilder.java @@ -0,0 +1,48 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.fieldstats; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.broadcast.BroadcastOperationRequestBuilder; +import org.elasticsearch.client.Client; + +/** + */ +public class FieldStatsRequestBuilder extends BroadcastOperationRequestBuilder { + + public FieldStatsRequestBuilder(Client client) { + super(client, new FieldStatsRequest()); + } + + public FieldStatsRequestBuilder setFields(String... fields) { + request().fields(fields); + return this; + } + + public FieldStatsRequestBuilder setLevel(String level) { + request().level(level); + return this; + } + + @Override + protected void doExecute(ActionListener listener) { + client.fieldStats(request, listener); + } +} diff --git a/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsResponse.java b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsResponse.java new file mode 100644 index 0000000000000..e6f69e9791a25 --- /dev/null +++ b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsResponse.java @@ -0,0 +1,87 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.fieldstats; + +import org.elasticsearch.action.ShardOperationFailedException; +import org.elasticsearch.action.support.broadcast.BroadcastOperationResponse; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + */ +public class FieldStatsResponse extends BroadcastOperationResponse { + + private Map> indicesMergedFieldStats; + + public FieldStatsResponse() { + } + + public FieldStatsResponse(int totalShards, int successfulShards, int failedShards, List shardFailures, Map> indicesMergedFieldStats) { + super(totalShards, successfulShards, failedShards, shardFailures); + this.indicesMergedFieldStats = indicesMergedFieldStats; + } + + @Nullable + public Map getAllFieldStats() { + return indicesMergedFieldStats.get("_all"); + } + + public Map> getIndicesMergedFieldStats() { + return indicesMergedFieldStats; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + final int size = in.readVInt(); + indicesMergedFieldStats = new HashMap<>(size); + for (int i = 0; i < size; i++) { + String key = in.readString(); + int indexSize = in.readVInt(); + Map indexFieldStats = new HashMap<>(indexSize); + indicesMergedFieldStats.put(key, indexFieldStats); + for (int j = 0; j < indexSize; j++) { + key = in.readString(); + FieldStats value = FieldStats.read(in); + indexFieldStats.put(key, value); + } + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeVInt(indicesMergedFieldStats.size()); + for (Map.Entry> entry1 : indicesMergedFieldStats.entrySet()) { + out.writeString(entry1.getKey()); + out.writeVInt(entry1.getValue().size()); + for (Map.Entry entry2 : entry1.getValue().entrySet()) { + out.writeString(entry2.getKey()); + entry2.getValue().writeTo(out); + } + } + } +} diff --git a/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsShardRequest.java b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsShardRequest.java new file mode 100644 index 0000000000000..fb46ff66d3b26 --- /dev/null +++ b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsShardRequest.java @@ -0,0 +1,59 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.fieldstats; + +import org.elasticsearch.action.support.broadcast.BroadcastShardOperationRequest; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.index.shard.ShardId; + +import java.io.IOException; + +/** + */ +public class FieldStatsShardRequest extends BroadcastShardOperationRequest { + + private String[] fields; + + public FieldStatsShardRequest() { + } + + public FieldStatsShardRequest(ShardId shardId, FieldStatsRequest request) { + super(shardId, request); + this.fields = request.fields(); + } + + public String[] getFields() { + return fields; + } + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + fields = in.readStringArray(); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeStringArrayNullable(fields); + } + +} diff --git a/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsShardResponse.java b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsShardResponse.java new file mode 100644 index 0000000000000..ada4552e94cdf --- /dev/null +++ b/src/main/java/org/elasticsearch/action/fieldstats/FieldStatsShardResponse.java @@ -0,0 +1,71 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.fieldstats; + +import org.elasticsearch.action.support.broadcast.BroadcastShardOperationResponse; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.index.shard.ShardId; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + */ +public class FieldStatsShardResponse extends BroadcastShardOperationResponse { + + private Map fieldStats; + + public FieldStatsShardResponse() { + } + + public FieldStatsShardResponse(ShardId shardId, Map fieldStats) { + super(shardId); + this.fieldStats = fieldStats; + } + + public Map getFieldStats() { + return fieldStats; + } + + + @Override + public void readFrom(StreamInput in) throws IOException { + super.readFrom(in); + final int size = in.readVInt(); + fieldStats = new HashMap<>(size); + for (int i = 0; i < size; i++) { + String key = in.readString(); + FieldStats value = FieldStats.read(in); + fieldStats.put(key, value); + } + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeVInt(fieldStats.size()); + for (Map.Entry entry : fieldStats.entrySet()) { + out.writeString(entry.getKey()); + entry.getValue().writeTo(out); + } + } +} diff --git a/src/main/java/org/elasticsearch/action/fieldstats/TransportFieldStatsTransportAction.java b/src/main/java/org/elasticsearch/action/fieldstats/TransportFieldStatsTransportAction.java new file mode 100644 index 0000000000000..a726c56e95709 --- /dev/null +++ b/src/main/java/org/elasticsearch/action/fieldstats/TransportFieldStatsTransportAction.java @@ -0,0 +1,184 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.action.fieldstats; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.ElasticsearchIllegalStateException; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ShardOperationFailedException; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.DefaultShardOperationFailedException; +import org.elasticsearch.action.support.broadcast.BroadcastShardOperationFailedException; +import org.elasticsearch.action.support.broadcast.TransportBroadcastOperationAction; +import org.elasticsearch.cluster.ClusterService; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.routing.GroupShardsIterator; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.IndexService; +import org.elasticsearch.index.engine.Engine; +import org.elasticsearch.index.mapper.FieldMappers; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.shard.IndexShard; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.indices.IndicesService; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.io.IOException; +import java.util.*; +import java.util.concurrent.atomic.AtomicReferenceArray; + +public class TransportFieldStatsTransportAction extends TransportBroadcastOperationAction { + + private final IndicesService indicesService; + + @Inject + public TransportFieldStatsTransportAction(Settings settings, ThreadPool threadPool, ClusterService clusterService, TransportService transportService, ActionFilters actionFilters, IndicesService indicesService) { + super(settings, FieldStatsAction.NAME, threadPool, clusterService, transportService, actionFilters); + this.indicesService = indicesService; + } + + @Override + protected String executor() { + return ThreadPool.Names.MANAGEMENT; + } + + @Override + protected FieldStatsRequest newRequestInstance() { + return new FieldStatsRequest(); + } + + @Override + protected FieldStatsResponse newResponse(FieldStatsRequest request, AtomicReferenceArray shardsResponses, ClusterState clusterState) { + int successfulShards = 0; + int failedShards = 0; + Map> indicesMergedFieldStats = new HashMap<>(); + List shardFailures = new ArrayList<>(); + for (int i = 0; i < shardsResponses.length(); i++) { + Object shardValue = shardsResponses.get(i); + if (shardValue == null) { + // simply ignore non active shards + } else if (shardValue instanceof BroadcastShardOperationFailedException) { + failedShards++; + shardFailures.add(new DefaultShardOperationFailedException((BroadcastShardOperationFailedException) shardValue)); + } else { + successfulShards++; + FieldStatsShardResponse shardResponse = (FieldStatsShardResponse) shardValue; + + final String indexName; + if ("cluster".equals(request.level())) { + indexName = "_all"; + } else if ("indices".equals(request.level())) { + indexName = shardResponse.getIndex(); + } else { + // should already have been catched by the FieldStatsRequest#validate(...) + throw new ElasticsearchIllegalArgumentException("Illegal level option [" + request.level() + "]"); + } + + Map indexMergedFieldStats = indicesMergedFieldStats.get(indexName); + if (indexMergedFieldStats == null) { + indicesMergedFieldStats.put(indexName, indexMergedFieldStats = new HashMap<>()); + } + + Map fieldStats = shardResponse.getFieldStats(); + for (Map.Entry entry : fieldStats.entrySet()) { + FieldStats existing = indexMergedFieldStats.get(entry.getKey()); + if (existing != null) { + if (existing.getType() != entry.getValue().getType()) { + throw new ElasticsearchIllegalStateException( + "trying to merge the field stats of field [" + entry.getKey() + "] from index [" + shardResponse.getIndex() + "] but the field type is incompatible, try to set the 'level' option to 'indices'" + ); + } + + existing.append(entry.getValue()); + } else { + indexMergedFieldStats.put(entry.getKey(), entry.getValue()); + } + } + } + } + return new FieldStatsResponse(shardsResponses.length(), successfulShards, failedShards, shardFailures, indicesMergedFieldStats); + } + + @Override + protected FieldStatsShardRequest newShardRequest() { + return new FieldStatsShardRequest(); + } + + @Override + protected FieldStatsShardRequest newShardRequest(int numShards, ShardRouting shard, FieldStatsRequest request) { + return new FieldStatsShardRequest(shard.shardId(), request); + } + + @Override + protected FieldStatsShardResponse newShardResponse() { + return new FieldStatsShardResponse(); + } + + @Override + protected FieldStatsShardResponse shardOperation(FieldStatsShardRequest request) throws ElasticsearchException { + ShardId shardId = request.shardId(); + Map fieldStats = new HashMap<>(); + IndexService indexServices = indicesService.indexServiceSafe(shardId.getIndex()); + MapperService mapperService = indexServices.mapperService(); + IndexShard shard = indexServices.shardSafe(shardId.id()); + shard.readAllowed(); + try (Engine.Searcher searcher = shard.acquireSearcher("fieldstats")) { + for (String field : request.getFields()) { + FieldMappers fieldMappers = mapperService.fullName(field); + if (fieldMappers != null) { + IndexReader reader = searcher.reader(); + Terms terms = MultiFields.getTerms(reader, field); + if (terms != null) { + fieldStats.put(field, fieldMappers.mapper().stats(terms, reader.maxDoc())); + } + } else { + throw new IllegalArgumentException("field [" + field + "] doesn't exist"); + } + } + } catch (IOException e) { + throw ExceptionsHelper.convertToElastic(e); + } + return new FieldStatsShardResponse(shardId, fieldStats); + } + + @Override + protected GroupShardsIterator shards(ClusterState clusterState, FieldStatsRequest request, String[] concreteIndices) { + return clusterService.operationRouting().searchShards(clusterState, request.indices(), concreteIndices, null, null); + } + + @Override + protected ClusterBlockException checkGlobalBlock(ClusterState state, FieldStatsRequest request) { + return state.blocks().globalBlockedException(ClusterBlockLevel.READ); + } + + @Override + protected ClusterBlockException checkRequestBlock(ClusterState state, FieldStatsRequest request, String[] concreteIndices) { + return state.blocks().indicesBlockedException(ClusterBlockLevel.READ, concreteIndices); + } +} diff --git a/src/main/java/org/elasticsearch/client/Client.java b/src/main/java/org/elasticsearch/client/Client.java index 55b8a83428a19..7f763e50d35b7 100644 --- a/src/main/java/org/elasticsearch/client/Client.java +++ b/src/main/java/org/elasticsearch/client/Client.java @@ -38,6 +38,9 @@ import org.elasticsearch.action.explain.ExplainRequest; import org.elasticsearch.action.explain.ExplainRequestBuilder; import org.elasticsearch.action.explain.ExplainResponse; +import org.elasticsearch.action.fieldstats.FieldStatsRequest; +import org.elasticsearch.action.fieldstats.FieldStatsRequestBuilder; +import org.elasticsearch.action.fieldstats.FieldStatsResponse; import org.elasticsearch.action.get.*; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.index.IndexRequestBuilder; @@ -682,6 +685,12 @@ public interface Client extends ElasticsearchClient, Releasable { */ void clearScroll(ClearScrollRequest request, ActionListener listener); + FieldStatsRequestBuilder prepareFieldStats(); + + ActionFuture fieldStats(FieldStatsRequest request); + + void fieldStats(FieldStatsRequest request, ActionListener listener); + /** * Returns this clients settings */ diff --git a/src/main/java/org/elasticsearch/client/support/AbstractClient.java b/src/main/java/org/elasticsearch/client/support/AbstractClient.java index 55f74434b0f2b..ccfab027fd4c8 100644 --- a/src/main/java/org/elasticsearch/client/support/AbstractClient.java +++ b/src/main/java/org/elasticsearch/client/support/AbstractClient.java @@ -44,6 +44,10 @@ import org.elasticsearch.action.explain.ExplainRequest; import org.elasticsearch.action.explain.ExplainRequestBuilder; import org.elasticsearch.action.explain.ExplainResponse; +import org.elasticsearch.action.fieldstats.FieldStatsAction; +import org.elasticsearch.action.fieldstats.FieldStatsRequest; +import org.elasticsearch.action.fieldstats.FieldStatsRequestBuilder; +import org.elasticsearch.action.fieldstats.FieldStatsResponse; import org.elasticsearch.action.get.*; import org.elasticsearch.action.index.IndexAction; import org.elasticsearch.action.index.IndexRequest; @@ -550,4 +554,19 @@ public ActionFuture clearScroll(ClearScrollRequest request) public ClearScrollRequestBuilder prepareClearScroll() { return new ClearScrollRequestBuilder(this); } + + @Override + public void fieldStats(FieldStatsRequest request, ActionListener listener) { + execute(FieldStatsAction.INSTANCE, request, listener); + } + + @Override + public ActionFuture fieldStats(FieldStatsRequest request) { + return execute(FieldStatsAction.INSTANCE, request); + } + + @Override + public FieldStatsRequestBuilder prepareFieldStats() { + return new FieldStatsRequestBuilder(this); + } } diff --git a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index a17a2852757d4..09b6b58137be6 100644 --- a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -20,14 +20,15 @@ package org.elasticsearch.index.mapper; import com.google.common.base.Strings; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.fielddata.FieldDataType; @@ -35,6 +36,7 @@ import org.elasticsearch.index.query.QueryParseContext; import org.elasticsearch.index.similarity.SimilarityProvider; +import java.io.IOException; import java.util.List; /** @@ -295,4 +297,9 @@ public static Loading parse(String loading, Loading defaultValue) { * */ public boolean isGenerated(); + /** + * @return a {@link FieldStats} instance that maps to the type of this field based on the provided {@link Terms} instance. + */ + FieldStats stats(Terms terms, int maxDoc) throws IOException; + } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index a9ab088d285cf..7d8712ea3db90 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -38,10 +38,15 @@ import org.apache.lucene.search.RegexpQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.index.Terms; +import org.apache.lucene.queries.TermFilter; +import org.apache.lucene.queries.TermsFilter; +import org.apache.lucene.search.*; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchIllegalArgumentException; import org.elasticsearch.ElasticsearchIllegalStateException; import org.elasticsearch.Version; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.collect.ImmutableOpenMap; import org.elasticsearch.common.lucene.BytesRefs; @@ -53,16 +58,7 @@ import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.FieldDataType; -import org.elasticsearch.index.mapper.ContentPath; -import org.elasticsearch.index.mapper.FieldMapper; -import org.elasticsearch.index.mapper.FieldMapperListener; -import org.elasticsearch.index.mapper.FieldMappers; -import org.elasticsearch.index.mapper.Mapper; -import org.elasticsearch.index.mapper.MapperParsingException; -import org.elasticsearch.index.mapper.MergeContext; -import org.elasticsearch.index.mapper.MergeMappingException; -import org.elasticsearch.index.mapper.ObjectMapperListener; -import org.elasticsearch.index.mapper.ParseContext; +import org.elasticsearch.index.mapper.*; import org.elasticsearch.index.mapper.ParseContext.Document; import org.elasticsearch.index.mapper.internal.AllFieldMapper; import org.elasticsearch.index.mapper.object.ObjectMapper; @@ -1121,4 +1117,10 @@ public boolean isGenerated() { return false; } + @Override + public FieldStats stats(Terms terms, int maxDoc) throws IOException { + return new FieldStats.Text( + maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), terms.getMin(), terms.getMax() + ); + } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java index 66d87a77aea08..58a419a6e42ff 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; @@ -30,6 +31,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; @@ -354,6 +356,15 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } } + @Override + public FieldStats stats(Terms terms, int maxDoc) throws IOException { + long minValue = NumericUtils.getMinInt(terms); + long maxValue = NumericUtils.getMaxInt(terms); + return new FieldStats.Long( + maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue + ); + } + public static class CustomByteNumericField extends CustomNumericField { private final byte number; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java index 8e5c88a9636d3..3de8b803f413a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java @@ -23,6 +23,7 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; @@ -31,6 +32,7 @@ import org.apache.lucene.util.NumericUtils; import org.apache.lucene.util.ToStringUtils; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; @@ -535,6 +537,15 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } } + @Override + public FieldStats stats(Terms terms, int maxDoc) throws IOException { + long minValue = NumericUtils.getMinLong(terms); + long maxValue = NumericUtils.getMaxLong(terms); + return new FieldStats.Date( + maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue, dateTimeFormatter + ); + } + private long parseStringValue(String value) { try { return dateTimeFormatter.parser().parseMillis(value); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index 6f6058439bfa8..95c3546b0a496 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -27,6 +27,7 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; @@ -34,6 +35,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; @@ -170,7 +172,7 @@ public Double value(Object value) { if (value instanceof BytesRef) { return Numbers.bytesToDouble((BytesRef) value); } - return Double.parseDouble(value.toString()); + return java.lang.Double.parseDouble(value.toString()); } @Override @@ -183,7 +185,7 @@ public BytesRef indexedValueForSearch(Object value) { @Override public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { - double iValue = Double.parseDouble(value); + double iValue = java.lang.Double.parseDouble(value); double iSim = fuzziness.asDouble(); return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep, iValue - iSim, @@ -254,13 +256,13 @@ protected void innerParseCreateField(ParseContext context, List fields) t } value = nullValue; } else { - value = Double.parseDouble(sExternalValue); + value = java.lang.Double.parseDouble(sExternalValue); } } else { value = ((Number) externalValue).doubleValue(); } if (context.includeInAll(includeInAll, this)) { - context.allEntries().addText(names.fullName(), Double.toString(value), boost); + context.allEntries().addText(names.fullName(), java.lang.Double.toString(value), boost); } } else { XContentParser parser = context.parser(); @@ -360,6 +362,15 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } + @Override + public FieldStats stats(Terms terms, int maxDoc) throws IOException { + double minValue = NumericUtils.sortableLongToDouble(NumericUtils.getMinLong(terms)); + double maxValue = NumericUtils.sortableLongToDouble(NumericUtils.getMaxLong(terms)); + return new FieldStats.Double( + maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue + ); + } + public static class CustomDoubleNumericField extends CustomNumericField { private final double number; @@ -382,7 +393,7 @@ public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws I @Override public String numericAsString() { - return Double.toString(number); + return java.lang.Double.toString(number); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index ab1391e969834..05531e629f8d0 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -27,6 +27,7 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; @@ -34,6 +35,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; @@ -170,7 +172,7 @@ public Float value(Object value) { if (value instanceof BytesRef) { return Numbers.bytesToFloat((BytesRef) value); } - return Float.parseFloat(value.toString()); + return java.lang.Float.parseFloat(value.toString()); } @Override @@ -186,14 +188,14 @@ private float parseValue(Object value) { return ((Number) value).floatValue(); } if (value instanceof BytesRef) { - return Float.parseFloat(((BytesRef) value).utf8ToString()); + return java.lang.Float.parseFloat(((BytesRef) value).utf8ToString()); } - return Float.parseFloat(value.toString()); + return java.lang.Float.parseFloat(value.toString()); } @Override public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { - float iValue = Float.parseFloat(value); + float iValue = java.lang.Float.parseFloat(value); final float iSim = fuzziness.asFloat(); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, iValue - iSim, @@ -260,13 +262,13 @@ protected void innerParseCreateField(ParseContext context, List fields) t } value = nullValue; } else { - value = Float.parseFloat(sExternalValue); + value = java.lang.Float.parseFloat(sExternalValue); } } else { value = ((Number) externalValue).floatValue(); } if (context.includeInAll(includeInAll, this)) { - context.allEntries().addText(names.fullName(), Float.toString(value), boost); + context.allEntries().addText(names.fullName(), java.lang.Float.toString(value), boost); } } else { XContentParser parser = context.parser(); @@ -367,6 +369,15 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } + @Override + public FieldStats stats(Terms terms, int maxDoc) throws IOException { + float minValue = NumericUtils.sortableIntToFloat(NumericUtils.getMinInt(terms)); + float maxValue = NumericUtils.sortableIntToFloat(NumericUtils.getMaxInt(terms)); + return new FieldStats.Float( + maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue + ); + } + public static class CustomFloatNumericField extends CustomNumericField { private final float number; @@ -389,7 +400,7 @@ public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws I @Override public String numericAsString() { - return Float.toString(number); + return java.lang.Float.toString(number); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java index eec2d84d0b9f5..f670a863dea91 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; @@ -31,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; @@ -357,6 +359,15 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } + @Override + public FieldStats stats(Terms terms, int maxDoc) throws IOException { + long minValue = NumericUtils.getMinInt(terms); + long maxValue = NumericUtils.getMaxInt(terms); + return new FieldStats.Long( + maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue + ); + } + public static class CustomIntegerNumericField extends CustomNumericField { private final int number; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java index c10fdf79af679..d12e0ab07330d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; @@ -31,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; @@ -165,7 +167,7 @@ public Long value(Object value) { if (value instanceof BytesRef) { return Numbers.bytesToLong((BytesRef) value); } - return Long.parseLong(value.toString()); + return java.lang.Long.parseLong(value.toString()); } @Override @@ -177,7 +179,7 @@ public BytesRef indexedValueForSearch(Object value) { @Override public Query fuzzyQuery(String value, Fuzziness fuzziness, int prefixLength, int maxExpansions, boolean transpositions) { - long iValue = Long.parseLong(value); + long iValue = java.lang.Long.parseLong(value); final long iSim = fuzziness.asLong(); return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, iValue - iSim, @@ -244,13 +246,13 @@ protected void innerParseCreateField(ParseContext context, List fields) t } value = nullValue; } else { - value = Long.parseLong(sExternalValue); + value = java.lang.Long.parseLong(sExternalValue); } } else { value = ((Number) externalValue).longValue(); } if (context.includeInAll(includeInAll, this)) { - context.allEntries().addText(names.fullName(), Long.toString(value), boost); + context.allEntries().addText(names.fullName(), java.lang.Long.toString(value), boost); } } else { XContentParser parser = context.parser(); @@ -338,6 +340,15 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } } + @Override + public FieldStats stats(Terms terms, int maxDoc) throws IOException { + long minValue = NumericUtils.getMinLong(terms); + long maxValue = NumericUtils.getMaxLong(terms); + return new FieldStats.Long( + maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue + ); + } + public static class CustomLongNumericField extends CustomNumericField { private final long number; @@ -360,7 +371,7 @@ public TokenStream tokenStream(Analyzer analyzer, TokenStream previous) throws I @Override public String numericAsString() { - return Long.toString(number); + return java.lang.Long.toString(number); } } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java index b16518769d171..bcacc56773a50 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.index.Terms; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; @@ -31,6 +32,7 @@ import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticsearchIllegalArgumentException; +import org.elasticsearch.action.fieldstats.FieldStats; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; @@ -355,6 +357,15 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, } + @Override + public FieldStats stats(Terms terms, int maxDoc) throws IOException { + long minValue = NumericUtils.getMinInt(terms); + long maxValue = NumericUtils.getMaxInt(terms); + return new FieldStats.Long( + maxDoc, terms.getDocCount(), terms.getSumDocFreq(), terms.getSumTotalTermFreq(), minValue, maxValue + ); + } + public static class CustomShortNumericField extends CustomNumericField { private final short number; diff --git a/src/main/java/org/elasticsearch/rest/action/RestActionModule.java b/src/main/java/org/elasticsearch/rest/action/RestActionModule.java index 9bc0d98905838..b8724b8a75b77 100644 --- a/src/main/java/org/elasticsearch/rest/action/RestActionModule.java +++ b/src/main/java/org/elasticsearch/rest/action/RestActionModule.java @@ -84,6 +84,7 @@ import org.elasticsearch.rest.action.delete.RestDeleteAction; import org.elasticsearch.rest.action.deletebyquery.RestDeleteByQueryAction; import org.elasticsearch.rest.action.explain.RestExplainAction; +import org.elasticsearch.rest.action.fieldstats.RestFieldStatsAction; import org.elasticsearch.rest.action.get.RestGetAction; import org.elasticsearch.rest.action.get.RestGetSourceAction; import org.elasticsearch.rest.action.get.RestHeadAction; @@ -229,6 +230,8 @@ protected void configure() { bind(RestDeleteIndexedScriptAction.class).asEagerSingleton(); + bind(RestFieldStatsAction.class).asEagerSingleton(); + // cat API Multibinder catActionMultibinder = Multibinder.newSetBinder(binder(), AbstractCatAction.class); catActionMultibinder.addBinding().to(RestAllocationAction.class).asEagerSingleton(); diff --git a/src/main/java/org/elasticsearch/rest/action/fieldstats/RestFieldStatsAction.java b/src/main/java/org/elasticsearch/rest/action/fieldstats/RestFieldStatsAction.java new file mode 100644 index 0000000000000..fd45c5a56d431 --- /dev/null +++ b/src/main/java/org/elasticsearch/rest/action/fieldstats/RestFieldStatsAction.java @@ -0,0 +1,84 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.rest.action.fieldstats; + +import org.elasticsearch.action.fieldstats.FieldStats; +import org.elasticsearch.action.fieldstats.FieldStatsRequest; +import org.elasticsearch.action.fieldstats.FieldStatsResponse; +import org.elasticsearch.action.support.IndicesOptions; +import org.elasticsearch.client.Client; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.rest.*; +import org.elasticsearch.rest.action.support.RestBuilderListener; + +import java.util.Map; + +import static org.elasticsearch.rest.RestRequest.Method.GET; +import static org.elasticsearch.rest.RestRequest.Method.POST; +import static org.elasticsearch.rest.action.support.RestActions.buildBroadcastShardsHeader; + +/** + */ +public class RestFieldStatsAction extends BaseRestHandler { + + @Inject + public RestFieldStatsAction(Settings settings, RestController controller, Client client) { + super(settings, controller, client); + controller.registerHandler(GET, "/_field_stats", this); + controller.registerHandler(POST, "/_field_stats", this); + controller.registerHandler(GET, "/{index}/_field_stats", this); + controller.registerHandler(POST, "/{index}/_field_stats", this); + } + + @Override + public void handleRequest(final RestRequest request, final RestChannel channel, final Client client) { + final FieldStatsRequest fieldStatsRequest = new FieldStatsRequest(); + fieldStatsRequest.indices(Strings.splitStringByCommaToArray(request.param("index"))); + fieldStatsRequest.indicesOptions(IndicesOptions.fromRequest(request, fieldStatsRequest.indicesOptions())); + fieldStatsRequest.fields(Strings.splitStringByCommaToArray(request.param("fields"))); + fieldStatsRequest.level(request.param("level", FieldStatsRequest.DEFAULT_LEVEL)); + fieldStatsRequest.listenerThreaded(false); + + client.fieldStats(fieldStatsRequest, new RestBuilderListener(channel) { + @Override + public RestResponse buildResponse(FieldStatsResponse response, XContentBuilder builder) throws Exception { + builder.startObject(); + buildBroadcastShardsHeader(builder, response); + + builder.startObject("indices"); + for (Map.Entry> entry1 : response.getIndicesMergedFieldStats().entrySet()) { + builder.startObject(entry1.getKey()); + builder.startObject("fields"); + for (Map.Entry entry2 : entry1.getValue().entrySet()) { + builder.field(entry2.getKey()); + entry2.getValue().toXContent(builder, request); + } + builder.endObject(); + builder.endObject(); + } + builder.endObject(); + return new BytesRestResponse(RestStatus.OK, builder); + } + }); + } +} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/fieldstats/FieldStatsIntegrationTests.java b/src/test/java/org/elasticsearch/fieldstats/FieldStatsIntegrationTests.java new file mode 100644 index 0000000000000..e31b60a238071 --- /dev/null +++ b/src/test/java/org/elasticsearch/fieldstats/FieldStatsIntegrationTests.java @@ -0,0 +1,214 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.fieldstats; + +import org.elasticsearch.ElasticsearchIllegalStateException; +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.fieldstats.FieldStats; +import org.elasticsearch.action.fieldstats.FieldStatsResponse; +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.test.ElasticsearchIntegrationTest; + +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAllSuccessful; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.nullValue; + +/** + */ +public class FieldStatsIntegrationTests extends ElasticsearchIntegrationTest { + + public void testRandom() throws Exception { + assertAcked(prepareCreate("test").addMapping( + "test", "string", "type=string", "date", "type=date", "double", "type=double", "double", "type=double", + "float", "type=float", "long", "type=long", "integer", "type=integer", "short", "type=short", "byte", "type=byte" + )); + + byte minByte = Byte.MAX_VALUE; + byte maxByte = Byte.MIN_VALUE; + short minShort = Short.MAX_VALUE; + short maxShort = Short.MIN_VALUE; + int minInt = Integer.MAX_VALUE; + int maxInt = Integer.MIN_VALUE; + long minLong = Long.MAX_VALUE; + long maxLong = Long.MIN_VALUE; + float minFloat = Float.MAX_VALUE; + float maxFloat = Float.MIN_VALUE; + double minDouble = Double.MAX_VALUE; + double maxDouble = Double.MIN_VALUE; + String minString = new String(Character.toChars(1114111)); + String maxString = "0"; + + int numDocs = scaledRandomIntBetween(128, 1024); + List request = new ArrayList<>(numDocs); + for (int doc = 0; doc < numDocs; doc++) { + byte b = randomByte(); + minByte = (byte) Math.min(minByte, b); + maxByte = (byte) Math.max(maxByte, b); + short s = randomShort(); + minShort = (short) Math.min(minShort, s); + maxShort = (short) Math.max(maxShort, s); + int i = randomInt(); + minInt = Math.min(minInt, i); + maxInt = Math.max(maxInt, i); + long l = randomLong(); + minLong = Math.min(minLong, l); + maxLong = Math.max(maxLong, l); + float f = randomFloat(); + minFloat = Math.min(minFloat, f); + maxFloat = Math.max(maxFloat, f); + double d = randomDouble(); + minDouble = Math.min(minDouble, d); + maxDouble = Math.max(maxDouble, d); + String str = randomRealisticUnicodeOfLength(3); + if (str.compareTo(minString) < 0) { + minString = str; + } + if (str.compareTo(maxString) > 0) { + maxString = str; + } + + request.add(client().prepareIndex("test", "test", Integer.toString(doc)) + .setSource("byte", b, "short", s, "integer", i, "long", l, "float", f, "double", d, "string", str) + ); + } + indexRandom(true, false, request); + + FieldStatsResponse response = client().prepareFieldStats().setFields("byte", "short", "integer", "long", "float", "double", "string").get(); + assertAllSuccessful(response); + + for (FieldStats stats : response.getAllFieldStats().values()) { + assertThat(stats.getMaxDoc(), equalTo((long) numDocs)); + assertThat(stats.getDocCount(), equalTo((long) numDocs)); + assertThat(stats.getDensity(), equalTo(100)); + } + + assertThat(response.getAllFieldStats().get("byte").getMinValue(), equalTo(Byte.toString(minByte))); + assertThat(response.getAllFieldStats().get("byte").getMaxValue(), equalTo(Byte.toString(maxByte))); + assertThat(response.getAllFieldStats().get("short").getMinValue(), equalTo(Short.toString(minShort))); + assertThat(response.getAllFieldStats().get("short").getMaxValue(), equalTo(Short.toString(maxShort))); + assertThat(response.getAllFieldStats().get("integer").getMinValue(), equalTo(Integer.toString(minInt))); + assertThat(response.getAllFieldStats().get("integer").getMaxValue(), equalTo(Integer.toString(maxInt))); + assertThat(response.getAllFieldStats().get("long").getMinValue(), equalTo(Long.toString(minLong))); + assertThat(response.getAllFieldStats().get("long").getMaxValue(), equalTo(Long.toString(maxLong))); + assertThat(response.getAllFieldStats().get("float").getMinValue(), equalTo(Float.toString(minFloat))); + assertThat(response.getAllFieldStats().get("float").getMaxValue(), equalTo(Float.toString(maxFloat))); + assertThat(response.getAllFieldStats().get("double").getMinValue(), equalTo(Double.toString(minDouble))); + assertThat(response.getAllFieldStats().get("double").getMaxValue(), equalTo(Double.toString(maxDouble))); + } + + public void testFieldStatsIndexLevel() throws Exception { + assertAcked(prepareCreate("test1").addMapping( + "test", "value", "type=long" + )); + assertAcked(prepareCreate("test2").addMapping( + "test", "value", "type=long" + )); + assertAcked(prepareCreate("test3").addMapping( + "test", "value", "type=long" + )); + + indexRange("test1", -10, 100); + indexRange("test2", 101, 200); + indexRange("test3", 201, 300); + + // default: + FieldStatsResponse response = client().prepareFieldStats().setFields("value").get(); + assertAllSuccessful(response); + assertThat(response.getAllFieldStats().get("value").getMinValue(), equalTo(Long.toString(-10))); + assertThat(response.getAllFieldStats().get("value").getMaxValue(), equalTo(Long.toString(300))); + assertThat(response.getIndicesMergedFieldStats().size(), equalTo(1)); + assertThat(response.getIndicesMergedFieldStats().get("_all").get("value").getMinValue(), equalTo(Long.toString(-10))); + assertThat(response.getIndicesMergedFieldStats().get("_all").get("value").getMaxValue(), equalTo(Long.toString(300))); + + // Level: cluster + response = client().prepareFieldStats().setFields("value").setLevel("cluster").get(); + assertAllSuccessful(response); + assertThat(response.getAllFieldStats().get("value").getMinValue(), equalTo(Long.toString(-10))); + assertThat(response.getAllFieldStats().get("value").getMaxValue(), equalTo(Long.toString(300))); + assertThat(response.getIndicesMergedFieldStats().size(), equalTo(1)); + assertThat(response.getIndicesMergedFieldStats().get("_all").get("value").getMinValue(), equalTo(Long.toString(-10))); + assertThat(response.getIndicesMergedFieldStats().get("_all").get("value").getMaxValue(), equalTo(Long.toString(300))); + + // Level: indices + response = client().prepareFieldStats().setFields("value").setLevel("indices").get(); + assertAllSuccessful(response); + assertThat(response.getAllFieldStats(), nullValue()); + assertThat(response.getIndicesMergedFieldStats().size(), equalTo(3)); + assertThat(response.getIndicesMergedFieldStats().get("test1").get("value").getMinValue(), equalTo(Long.toString(-10))); + assertThat(response.getIndicesMergedFieldStats().get("test1").get("value").getMaxValue(), equalTo(Long.toString(100))); + assertThat(response.getIndicesMergedFieldStats().get("test2").get("value").getMinValue(), equalTo(Long.toString(101))); + assertThat(response.getIndicesMergedFieldStats().get("test2").get("value").getMaxValue(), equalTo(Long.toString(200))); + assertThat(response.getIndicesMergedFieldStats().get("test3").get("value").getMinValue(), equalTo(Long.toString(201))); + assertThat(response.getIndicesMergedFieldStats().get("test3").get("value").getMaxValue(), equalTo(Long.toString(300))); + + // Illegal level option: + try { + client().prepareFieldStats().setFields("value").setLevel("illegal").get(); + fail(); + } catch (ActionRequestValidationException e) { + assertThat(e.getMessage(), equalTo("Validation Failed: 1: invalid level option [illegal];")); + } + } + + public void testIncompatibleFieldTypes() { + assertAcked(prepareCreate("test1").addMapping( + "test", "value", "type=long" + )); + assertAcked(prepareCreate("test2").addMapping( + "test", "value", "type=string" + )); + + client().prepareIndex("test1", "test").setSource("value", 1l).get(); + client().prepareIndex("test1", "test").setSource("value", 2l).get(); + client().prepareIndex("test2", "test").setSource("value", "a").get(); + client().prepareIndex("test2", "test").setSource("value", "b").get(); + refresh(); + + try { + client().prepareFieldStats().setFields("value").get(); + fail(); + } catch (ElasticsearchIllegalStateException e){ + assertThat(e.getMessage(), containsString("trying to merge the field stats of field [value]")); + } + + FieldStatsResponse response = client().prepareFieldStats().setFields("value").setLevel("indices").get(); + assertAllSuccessful(response); + assertThat(response.getIndicesMergedFieldStats().size(), equalTo(2)); + assertThat(response.getIndicesMergedFieldStats().get("test1").get("value").getMinValue(), equalTo(Long.toString(1))); + assertThat(response.getIndicesMergedFieldStats().get("test1").get("value").getMaxValue(), equalTo(Long.toString(2))); + assertThat(response.getIndicesMergedFieldStats().get("test2").get("value").getMinValue(), equalTo("a")); + assertThat(response.getIndicesMergedFieldStats().get("test2").get("value").getMaxValue(), equalTo("b")); + } + + private void indexRange(String index, long from, long to) throws ExecutionException, InterruptedException { + List requests = new ArrayList<>(); + for (long value = from; value <= to; value++) { + requests.add(client().prepareIndex(index, "test").setSource("value", value)); + } + indexRandom(true, false, requests); + } + +} diff --git a/src/test/java/org/elasticsearch/fieldstats/FieldStatsTests.java b/src/test/java/org/elasticsearch/fieldstats/FieldStatsTests.java new file mode 100644 index 0000000000000..cd5ececc18a03 --- /dev/null +++ b/src/test/java/org/elasticsearch/fieldstats/FieldStatsTests.java @@ -0,0 +1,194 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.fieldstats; + +import org.elasticsearch.action.fieldstats.FieldStats; +import org.elasticsearch.action.fieldstats.FieldStatsResponse; +import org.elasticsearch.common.settings.ImmutableSettings; +import org.elasticsearch.test.ElasticsearchSingleNodeTest; + +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +import static org.hamcrest.Matchers.*; + +/** + */ +public class FieldStatsTests extends ElasticsearchSingleNodeTest { + + public void testByte() { + testNumberRange("field1", "byte", 12, 18); + testNumberRange("field1", "byte", -5, 5); + testNumberRange("field1", "byte", -18, -12); + } + + public void testShort() { + testNumberRange("field1", "short", 256, 266); + testNumberRange("field1", "short", -5, 5); + testNumberRange("field1", "short", -266, -256); + } + + public void testInteger() { + testNumberRange("field1", "integer", 56880, 56890); + testNumberRange("field1", "integer", -5, 5); + testNumberRange("field1", "integer", -56890, -56880); + } + + public void testLong() { + testNumberRange("field1", "long", 312321312312412l, 312321312312422l); + testNumberRange("field1", "long", -5, 5); + testNumberRange("field1", "long", -312321312312422l, -312321312312412l); + } + + public void testString() { + createIndex("test", ImmutableSettings.EMPTY, "field", "value", "type=string"); + for (int value = 0; value <= 10; value++) { + client().prepareIndex("test", "test").setSource("field", String.format(Locale.ENGLISH, "%03d", value)).get(); + } + client().admin().indices().prepareRefresh().get(); + + FieldStatsResponse result = client().prepareFieldStats().setFields("field").get(); + assertThat(result.getAllFieldStats().get("field").getMaxDoc(), equalTo(11l)); + assertThat(result.getAllFieldStats().get("field").getDocCount(), equalTo(11l)); + assertThat(result.getAllFieldStats().get("field").getDensity(), equalTo(100)); + assertThat(result.getAllFieldStats().get("field").getMinValue(), equalTo(String.format(Locale.ENGLISH, "%03d", 0))); + assertThat(result.getAllFieldStats().get("field").getMaxValue(), equalTo(String.format(Locale.ENGLISH, "%03d", 10))); + } + + public void testDouble() { + String fieldName = "field"; + createIndex("test", ImmutableSettings.EMPTY, fieldName, "value", "type=double"); + for (double value = -1; value <= 9; value++) { + client().prepareIndex("test", "test").setSource(fieldName, value).get(); + } + client().admin().indices().prepareRefresh().get(); + + FieldStatsResponse result = client().prepareFieldStats().setFields(fieldName).get(); + assertThat(result.getAllFieldStats().get(fieldName).getMaxDoc(), equalTo(11l)); + assertThat(result.getAllFieldStats().get(fieldName).getDocCount(), equalTo(11l)); + assertThat(result.getAllFieldStats().get(fieldName).getDensity(), equalTo(100)); + assertThat(result.getAllFieldStats().get(fieldName).getMinValue(), equalTo(Double.toString(-1))); + assertThat(result.getAllFieldStats().get(fieldName).getMaxValue(), equalTo(Double.toString(9))); + } + + public void testFloat() { + String fieldName = "field"; + createIndex("test", ImmutableSettings.EMPTY, fieldName, "value", "type=float"); + for (float value = -1; value <= 9; value++) { + client().prepareIndex("test", "test").setSource(fieldName, value).get(); + } + client().admin().indices().prepareRefresh().get(); + + FieldStatsResponse result = client().prepareFieldStats().setFields(fieldName).get(); + assertThat(result.getAllFieldStats().get(fieldName).getMaxDoc(), equalTo(11l)); + assertThat(result.getAllFieldStats().get(fieldName).getDocCount(), equalTo(11l)); + assertThat(result.getAllFieldStats().get(fieldName).getDensity(), equalTo(100)); + assertThat(result.getAllFieldStats().get(fieldName).getMinValue(), equalTo(Float.toString(-1))); + assertThat(result.getAllFieldStats().get(fieldName).getMaxValue(), equalTo(Float.toString(9))); + } + + private void testNumberRange(String fieldName, String fieldType, long min, long max) { + createIndex("test", ImmutableSettings.EMPTY, fieldName, "value", "type=" + fieldType); + for (long value = min; value <= max; value++) { + client().prepareIndex("test", "test").setSource(fieldName, value).get(); + } + client().admin().indices().prepareRefresh().get(); + + FieldStatsResponse result = client().prepareFieldStats().setFields(fieldName).get(); + long numDocs = max - min + 1; + assertThat(result.getAllFieldStats().get(fieldName).getMaxDoc(), equalTo(numDocs)); + assertThat(result.getAllFieldStats().get(fieldName).getDocCount(), equalTo(numDocs)); + assertThat(result.getAllFieldStats().get(fieldName).getDensity(), equalTo(100)); + assertThat(result.getAllFieldStats().get(fieldName).getMinValue(), equalTo(java.lang.Long.toString(min))); + assertThat(result.getAllFieldStats().get(fieldName).getMaxValue(), equalTo(java.lang.Long.toString(max))); + client().admin().indices().prepareDelete("test").get(); + } + + public void testMerge() { + List stats = new ArrayList<>(); + stats.add(new FieldStats.Long(1, 1l, 1l, 1l, 1l, 1l)); + stats.add(new FieldStats.Long(1, 1l, 1l, 1l, 1l, 1l)); + stats.add(new FieldStats.Long(1, 1l, 1l, 1l, 1l, 1l)); + + FieldStats stat = new FieldStats.Long(1, 1l, 1l, 1l, 1l, 1l); + for (FieldStats otherStat : stats) { + stat.append(otherStat); + } + assertThat(stat.getMaxDoc(), equalTo(4l)); + assertThat(stat.getDocCount(), equalTo(4l)); + assertThat(stat.getSumDocFreq(), equalTo(4l)); + assertThat(stat.getSumTotalTermFreq(), equalTo(4l)); + } + + public void testMerge_notAvailable() { + List stats = new ArrayList<>(); + stats.add(new FieldStats.Long(1, 1l, 1l, 1l, 1l, 1l)); + stats.add(new FieldStats.Long(1, 1l, 1l, 1l, 1l, 1l)); + stats.add(new FieldStats.Long(1, 1l, 1l, 1l, 1l, 1l)); + + FieldStats stat = new FieldStats.Long(1, -1l, -1l, -1l, 1l, 1l); + for (FieldStats otherStat : stats) { + stat.append(otherStat); + } + assertThat(stat.getMaxDoc(), equalTo(4l)); + assertThat(stat.getDocCount(), equalTo(-1l)); + assertThat(stat.getSumDocFreq(), equalTo(-1l)); + assertThat(stat.getSumTotalTermFreq(), equalTo(-1l)); + + stats.add(new FieldStats.Long(1, -1l, -1l, -1l, 1l, 1l)); + stat = stats.remove(0); + for (FieldStats otherStat : stats) { + stat.append(otherStat); + } + assertThat(stat.getMaxDoc(), equalTo(4l)); + assertThat(stat.getDocCount(), equalTo(-1l)); + assertThat(stat.getSumDocFreq(), equalTo(-1l)); + assertThat(stat.getSumTotalTermFreq(), equalTo(-1l)); + } + + public void testInvalidField() { + createIndex("test1", ImmutableSettings.EMPTY, "field1", "value", "type=string"); + client().prepareIndex("test1", "test").setSource("field1", "a").get(); + client().prepareIndex("test1", "test").setSource("field1", "b").get(); + + createIndex("test2", ImmutableSettings.EMPTY, "field2", "value", "type=string"); + client().prepareIndex("test2", "test").setSource("field2", "a").get(); + client().prepareIndex("test2", "test").setSource("field2", "b").get(); + client().admin().indices().prepareRefresh().get(); + + FieldStatsResponse result = client().prepareFieldStats().setFields("field1", "field2").get(); + assertThat(result.getFailedShards(), equalTo(2)); + assertThat(result.getTotalShards(), equalTo(2)); + assertThat(result.getSuccessfulShards(), equalTo(0)); + assertThat(result.getShardFailures()[0].reason(), either(containsString("field [field1] doesn't exist")).or(containsString("field [field2] doesn't exist"))); + assertThat(result.getIndicesMergedFieldStats().size(), equalTo(0)); + + // will only succeed on the 'test2' shard, because there the field does exist + result = client().prepareFieldStats().setFields("field1").get(); + assertThat(result.getFailedShards(), equalTo(1)); + assertThat(result.getTotalShards(), equalTo(2)); + assertThat(result.getSuccessfulShards(), equalTo(1)); + assertThat(result.getShardFailures()[0].reason(), either(containsString("field [field1] doesn't exist")).or(containsString("field [field2] doesn't exist"))); + assertThat(result.getIndicesMergedFieldStats().get("_all").get("field1").getMinValue(), equalTo("a")); + assertThat(result.getIndicesMergedFieldStats().get("_all").get("field1").getMaxValue(), equalTo("b")); + } + +} \ No newline at end of file