Skip to content

Commit

Permalink
Merge pull request #15017 from jimferenczi/fields_option
Browse files Browse the repository at this point in the history
Refuse to load fields from _source when using the `fields` option and support wildcards.
  • Loading branch information
jimczi committed Nov 30, 2015
2 parents 72be42d + 731833c commit e182072
Show file tree
Hide file tree
Showing 9 changed files with 95 additions and 149 deletions.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -19,30 +19,47 @@
package org.elasticsearch.index.fieldvisitor;

import org.apache.lucene.index.FieldInfo;
import org.elasticsearch.common.regex.Regex;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Set;

/**
* A field visitor that allows to load a selection of the stored fields.
* A field visitor that allows to load a selection of the stored fields by exact name or by pattern.
* Supported pattern styles: "xxx*", "*xxx", "*xxx*" and "xxx*yyy".
* The Uid field is always loaded.
* The class is optimized for source loading as it is a common use case.
*/
public class CustomFieldsVisitor extends FieldsVisitor {

private final Set<String> fields;
private final List<String> patterns;

public CustomFieldsVisitor(Set<String> fields, boolean loadSource) {
public CustomFieldsVisitor(Set<String> fields, List<String> patterns, boolean loadSource) {
super(loadSource);
this.fields = fields;
this.patterns = patterns;
}

public CustomFieldsVisitor(Set<String> fields, boolean loadSource) {
this(fields, Collections.emptyList(), loadSource);
}

@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
if (super.needsField(fieldInfo) == Status.YES) {
return Status.YES;
}

return fields.contains(fieldInfo.name) ? Status.YES : Status.NO;
if (fields.contains(fieldInfo.name)) {
return Status.YES;
}
for (String pattern : patterns) {
if (Regex.simpleMatch(pattern, fieldInfo.name)) {
return Status.YES;
}
}
return Status.NO;
}
}
120 changes: 29 additions & 91 deletions core/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.text.StringAndBytesText;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.xcontent.XContentHelper;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.common.xcontent.support.XContentMapValues;
import org.elasticsearch.index.fieldvisitor.AllFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.fieldvisitor.FieldsVisitor;
import org.elasticsearch.index.mapper.DocumentMapper;
Expand All @@ -55,13 +55,7 @@
import org.elasticsearch.search.lookup.SourceLookup;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;

import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.common.xcontent.XContentFactory.contentBuilder;
Expand Down Expand Up @@ -98,9 +92,7 @@ public void preProcess(SearchContext context) {
public void execute(SearchContext context) {
FieldsVisitor fieldsVisitor;
Set<String> fieldNames = null;
List<String> extractFieldNames = null;

boolean loadAllStored = false;
List<String> fieldNamePatterns = null;
if (!context.hasFieldNames()) {
// no fields specified, default to return source if no explicit indication
if (!context.hasScriptFields() && !context.hasFetchSourceContext()) {
Expand All @@ -111,10 +103,6 @@ public void execute(SearchContext context) {
fieldsVisitor = new FieldsVisitor(context.sourceRequested());
} else {
for (String fieldName : context.fieldNames()) {
if (fieldName.equals("*")) {
loadAllStored = true;
continue;
}
if (fieldName.equals(SourceFieldMapper.NAME)) {
if (context.hasFetchSourceContext()) {
context.fetchSourceContext().fetchSource(true);
Expand All @@ -123,32 +111,28 @@ public void execute(SearchContext context) {
}
continue;
}
MappedFieldType fieldType = context.smartNameFieldType(fieldName);
if (fieldType == null) {
// Only fail if we know it is a object field, missing paths / fields shouldn't fail.
if (context.getObjectMapper(fieldName) != null) {
throw new IllegalArgumentException("field [" + fieldName + "] isn't a leaf field");
if (Regex.isSimpleMatchPattern(fieldName)) {
if (fieldNamePatterns == null) {
fieldNamePatterns = new ArrayList<>();
}
fieldNamePatterns.add(fieldName);
} else {
MappedFieldType fieldType = context.smartNameFieldType(fieldName);
if (fieldType == null) {
// Only fail if we know it is a object field, missing paths / fields shouldn't fail.
if (context.getObjectMapper(fieldName) != null) {
throw new IllegalArgumentException("field [" + fieldName + "] isn't a leaf field");
}
}
} else if (fieldType.stored()) {
if (fieldNames == null) {
fieldNames = new HashSet<>();
}
fieldNames.add(fieldType.names().indexName());
} else {
if (extractFieldNames == null) {
extractFieldNames = new ArrayList<>();
}
extractFieldNames.add(fieldName);
fieldNames.add(fieldName);
}
}
if (loadAllStored) {
fieldsVisitor = new AllFieldsVisitor(); // load everything, including _source
} else if (fieldNames != null) {
boolean loadSource = extractFieldNames != null || context.sourceRequested();
fieldsVisitor = new CustomFieldsVisitor(fieldNames, loadSource);
} else {
fieldsVisitor = new FieldsVisitor(extractFieldNames != null || context.sourceRequested());
}
boolean loadSource = context.sourceRequested();
fieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames,
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, loadSource);
}

InternalSearchHit[] hits = new InternalSearchHit[context.docIdsToLoadSize()];
Expand All @@ -163,9 +147,9 @@ public void execute(SearchContext context) {
try {
int rootDocId = findRootDocumentIfNested(context, subReaderContext, subDocId);
if (rootDocId != -1) {
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, extractFieldNames, loadAllStored, fieldNames, subReaderContext);
searchHit = createNestedSearchHit(context, docId, subDocId, rootDocId, fieldNames, fieldNamePatterns, subReaderContext);
} else {
searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, extractFieldNames, subReaderContext);
searchHit = createSearchHit(context, fieldsVisitor, docId, subDocId, subReaderContext);
}
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
Expand Down Expand Up @@ -199,7 +183,7 @@ private int findRootDocumentIfNested(SearchContext context, LeafReaderContext su
return -1;
}

private InternalSearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId, List<String> extractFieldNames, LeafReaderContext subReaderContext) {
private InternalSearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId, LeafReaderContext subReaderContext) {
loadStoredFields(context, subReaderContext, fieldsVisitor, subDocId);
fieldsVisitor.postProcess(context.mapperService());

Expand All @@ -219,45 +203,24 @@ private InternalSearchHit createSearchHit(SearchContext context, FieldsVisitor f
typeText = documentMapper.typeText();
}
InternalSearchHit searchHit = new InternalSearchHit(docId, fieldsVisitor.uid().id(), typeText, searchFields);

// go over and extract fields that are not mapped / stored
// Set _source if requested.
SourceLookup sourceLookup = context.lookup().source();
sourceLookup.setSegmentAndDocument(subReaderContext, subDocId);
if (fieldsVisitor.source() != null) {
sourceLookup.setSource(fieldsVisitor.source());
}
if (extractFieldNames != null) {
for (String extractFieldName : extractFieldNames) {
List<Object> values = context.lookup().source().extractRawValues(extractFieldName);
if (!values.isEmpty()) {
if (searchHit.fieldsOrNull() == null) {
searchHit.fields(new HashMap<String, SearchHitField>(2));
}

SearchHitField hitField = searchHit.fields().get(extractFieldName);
if (hitField == null) {
hitField = new InternalSearchHitField(extractFieldName, new ArrayList<>(2));
searchHit.fields().put(extractFieldName, hitField);
}
for (Object value : values) {
hitField.values().add(value);
}
}
}
}

return searchHit;
}

private InternalSearchHit createNestedSearchHit(SearchContext context, int nestedTopDocId, int nestedSubDocId, int rootSubDocId, List<String> extractFieldNames, boolean loadAllStored, Set<String> fieldNames, LeafReaderContext subReaderContext) throws IOException {
private InternalSearchHit createNestedSearchHit(SearchContext context, int nestedTopDocId, int nestedSubDocId, int rootSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) throws IOException {
// Also if highlighting is requested on nested documents we need to fetch the _source from the root document,
// otherwise highlighting will attempt to fetch the _source from the nested doc, which will fail,
// because the entire _source is only stored with the root document.
final FieldsVisitor rootFieldsVisitor = new FieldsVisitor(context.sourceRequested() || extractFieldNames != null || context.highlight() != null);
final FieldsVisitor rootFieldsVisitor = new FieldsVisitor(context.sourceRequested() || context.highlight() != null);
loadStoredFields(context, subReaderContext, rootFieldsVisitor, rootSubDocId);
rootFieldsVisitor.postProcess(context.mapperService());

Map<String, SearchHitField> searchFields = getSearchFields(context, nestedSubDocId, loadAllStored, fieldNames, subReaderContext);
Map<String, SearchHitField> searchFields = getSearchFields(context, nestedSubDocId, fieldNames, fieldNamePatterns, subReaderContext);
DocumentMapper documentMapper = context.mapperService().documentMapper(rootFieldsVisitor.uid().type());
SourceLookup sourceLookup = context.lookup().source();
sourceLookup.setSegmentAndDocument(subReaderContext, nestedSubDocId);
Expand Down Expand Up @@ -299,39 +262,14 @@ private InternalSearchHit createNestedSearchHit(SearchContext context, int neste
}

InternalSearchHit searchHit = new InternalSearchHit(nestedTopDocId, rootFieldsVisitor.uid().id(), documentMapper.typeText(), nestedIdentity, searchFields);
if (extractFieldNames != null) {
for (String extractFieldName : extractFieldNames) {
List<Object> values = context.lookup().source().extractRawValues(extractFieldName);
if (!values.isEmpty()) {
if (searchHit.fieldsOrNull() == null) {
searchHit.fields(new HashMap<String, SearchHitField>(2));
}

SearchHitField hitField = searchHit.fields().get(extractFieldName);
if (hitField == null) {
hitField = new InternalSearchHitField(extractFieldName, new ArrayList<>(2));
searchHit.fields().put(extractFieldName, hitField);
}
for (Object value : values) {
hitField.values().add(value);
}
}
}
}

return searchHit;
}

private Map<String, SearchHitField> getSearchFields(SearchContext context, int nestedSubDocId, boolean loadAllStored, Set<String> fieldNames, LeafReaderContext subReaderContext) {
private Map<String, SearchHitField> getSearchFields(SearchContext context, int nestedSubDocId, Set<String> fieldNames, List<String> fieldNamePatterns, LeafReaderContext subReaderContext) {
Map<String, SearchHitField> searchFields = null;
if (context.hasFieldNames() && !context.fieldNames().isEmpty()) {
FieldsVisitor nestedFieldsVisitor = null;
if (loadAllStored) {
nestedFieldsVisitor = new AllFieldsVisitor();
} else if (fieldNames != null) {
nestedFieldsVisitor = new CustomFieldsVisitor(fieldNames, false);
}

FieldsVisitor nestedFieldsVisitor = new CustomFieldsVisitor(fieldNames == null ? Collections.emptySet() : fieldNames,
fieldNamePatterns == null ? Collections.emptyList() : fieldNamePatterns, false);
if (nestedFieldsVisitor != null) {
loadStoredFields(context, subReaderContext, nestedFieldsVisitor, nestedSubDocId);
nestedFieldsVisitor.postProcess(context.mapperService());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,8 @@ public void testSimpleIndexTemplateTests() throws Exception {

assertHitCount(searchResponse, 1);
assertThat(searchResponse.getHits().getAt(0).field("field1").value().toString(), equalTo("value1"));
assertThat(searchResponse.getHits().getAt(0).field("field2").value().toString(), equalTo("value 2")); // this will still be loaded because of the source feature
// field2 is not stored.
assertThat(searchResponse.getHits().getAt(0).field("field2"), nullValue());

client().prepareIndex("text_index", "type1", "1").setSource("field1", "value1", "field2", "value 2").setRefresh(true).execute().actionGet();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ public void setupSuiteScopeCluster() throws Exception {
.endObject()));
}
assertAcked(prepareCreate(HIGH_CARD_IDX_NAME).setSettings(Settings.builder().put("number_of_shards", 2))
.addMapping("type", SINGLE_VALUED_FIELD_NAME, "type=geo_point", MULTI_VALUED_FIELD_NAME, "type=geo_point", NUMBER_FIELD_NAME, "type=long", "tag", "type=string,index=not_analyzed"));
.addMapping("type", SINGLE_VALUED_FIELD_NAME, "type=geo_point", MULTI_VALUED_FIELD_NAME, "type=geo_point", NUMBER_FIELD_NAME, "type=long,store=true", "tag", "type=string,index=not_analyzed"));

for (int i = 0; i < 2000; i++) {
singleVal = singleValues[i % numUniqueGeoPoints];
Expand Down Expand Up @@ -196,8 +196,8 @@ public void setupSuiteScopeCluster() throws Exception {
SearchHitField hitField = searchHit.field(NUMBER_FIELD_NAME);

assertThat("Hit " + i + " has wrong number of values", hitField.getValues().size(), equalTo(1));
Integer value = hitField.getValue();
assertThat("Hit " + i + " has wrong value", value, equalTo(i));
Long value = hitField.getValue();
assertThat("Hit " + i + " has wrong value", value.intValue(), equalTo(i));
}
assertThat(totalHits, equalTo(2000l));
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,8 @@ public void testFetchFeatures() {
topHits("hits").setSize(1)
.highlighter(new HighlightBuilder().field("text"))
.setExplain(true)
.addFieldDataField("field1")
.addField("text")
.addFieldDataField("field1")
.addScriptField("script", new Script("5", ScriptService.ScriptType.INLINE, MockScriptEngine.NAME, Collections.emptyMap()))
.setFetchSource("text", null)
.setVersion(true)
Expand Down Expand Up @@ -569,8 +569,7 @@ public void testFetchFeatures() {
SearchHitField field = hit.field("field1");
assertThat(field.getValue().toString(), equalTo("5"));

field = hit.field("text");
assertThat(field.getValue().toString(), equalTo("some text to entertain"));
assertThat(hit.getSource().get("text").toString(), equalTo("some text to entertain"));

field = hit.field("script");
assertThat(field.getValue().toString(), equalTo("5"));
Expand Down
4 changes: 4 additions & 0 deletions docs/reference/migration/migrate_3_0.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -468,3 +468,7 @@ response is output by default.
Finally, the API for org.elasticsearch.monitor.os.OsStats has changed. The `getLoadAverage` method has been removed. The
value for this can now be obtained from `OsStats.Cpu#getLoadAverage`. Additionally, the recent CPU usage can be obtained
from `OsStats.Cpu#getPercent`.

=== Fields option
Only stored fields are retrievable with this option.
The fields option won't be able to load non stored fields from _source anymore.
Loading

0 comments on commit e182072

Please sign in to comment.