Skip to content

Commit

Permalink
Fix Term Vectors with artificial docs and keyword fields (elastic#53504)
Browse files Browse the repository at this point in the history
Previously, Term Vectors API was returning empty results for
artificial documents with keyword fields. Checking only for `string()`
on `IndexableField` is not enough, since for `KeywordFieldType`
`binaryValue()` must be used instead.

Fixes elastic#53494
  • Loading branch information
matriv authored Mar 13, 2020
1 parent 6f1fdcf commit 1fc3fe3
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -120,24 +120,6 @@ public IndexableField[] getFields(String name) {
return f.toArray(new IndexableField[f.size()]);
}

/**
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
* If you want the actual numeric field instances back, use {@link #getFields}.
* @param name the name of the field
* @return a <code>String[]</code> of field values
*/
public final String[] getValues(String name) {
List<String> result = new ArrayList<>();
for (IndexableField field : fields) {
if (field.name().equals(name) && field.stringValue() != null) {
result.add(field.stringValue());
}
}
return result.toArray(new String[result.size()]);
}

public IndexableField getField(String name) {
for (IndexableField field : fields) {
if (field.name().equals(name)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.elasticsearch.index.get.GetResult;
import org.elasticsearch.index.mapper.DocumentMapperForType;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.KeywordFieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.mapper.ParseContext;
Expand All @@ -56,6 +57,7 @@
import org.elasticsearch.search.dfs.AggregatedDfs;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
Expand Down Expand Up @@ -313,14 +315,33 @@ private static Fields generateTermVectorsFromDoc(IndexShard indexShard, TermVect
else {
seenFields.add(field.name());
}
String[] values = doc.getValues(field.name());
String[] values = getValues(doc.getFields(field.name()));
documentFields.add(new DocumentField(field.name(), Arrays.asList((Object[]) values)));
}
return generateTermVectors(indexShard,
XContentHelper.convertToMap(parsedDocument.source(), true, request.xContentType()).v2(), documentFields,
request.offsets(), request.perFieldAnalyzer(), seenFields);
}

/**
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
* @param fields The <code>IndexableField</code> to get the values from
* @return a <code>String[]</code> of field values
*/
public static String[] getValues(IndexableField[] fields) {
List<String> result = new ArrayList<>();
for (IndexableField field : fields) {
if (field.fieldType() instanceof KeywordFieldMapper.KeywordFieldType) {
result.add(field.binaryValue().utf8ToString());
} else if (field.fieldType() instanceof StringFieldType) {
result.add(field.stringValue());
}
}
return result.toArray(new String[0]);
}

private static ParsedDocument parseDocument(IndexShard indexShard, String index, BytesReference doc,
XContentType xContentType, String routing) {
MapperService mapperService = indexShard.mapperService();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
Expand Down Expand Up @@ -202,7 +203,7 @@ private void testIgnoreMalfomedForValue(String value, String expectedException)

IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(0, fields.length);
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
}

public void testChangeFormat() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,11 +154,15 @@ public void testDotsWithExistingMapper() throws Exception {
.endObject());
ParsedDocument doc = mapper.parse(new SourceToParse("test", "1", bytes, XContentType.JSON));
assertNull(doc.dynamicMappingsUpdate()); // no update!
String[] values = doc.rootDoc().getValues("foo.bar.baz");
assertEquals(3, values.length);
assertEquals("123", values[0]);
assertEquals("456", values[1]);
assertEquals("789", values[2]);

IndexableField[] fields = doc.rootDoc().getFields("foo.bar.baz");
assertEquals(6, fields.length);
assertEquals(123, fields[0].numericValue());
assertEquals("123", fields[1].stringValue());
assertEquals(456, fields[2].numericValue());
assertEquals("456", fields[3].stringValue());
assertEquals(789, fields[4].numericValue());
assertEquals("789", fields[5].stringValue());
}

public void testDotsWithExistingNestedMapper() throws Exception {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.VersionUtils;

Expand All @@ -52,7 +53,7 @@ private static SortedSet<String> set(String... values) {
}

void assertFieldNames(Set<String> expected, ParsedDocument doc) {
String[] got = doc.rootDoc().getValues("_field_names");
String[] got = TermVectorsService.getValues(doc.rootDoc().getFields("_field_names"));
assertEquals(expected, set(got));
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.IndexService;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
Expand Down Expand Up @@ -194,7 +195,7 @@ public void testIgnoreMalformed() throws Exception {

IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(0, fields.length);
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
}

public void testNullValue() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.mapper.MapperService.MergeReason;
import org.elasticsearch.index.termvectors.TermVectorsService;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.plugins.Plugin;
Expand Down Expand Up @@ -128,6 +129,9 @@ public void testDefaults() throws Exception {
fieldType = fields[1].fieldType();
assertThat(fieldType.indexOptions(), equalTo(IndexOptions.NONE));
assertEquals(DocValuesType.SORTED_SET, fieldType.docValuesType());

// used by TermVectorsService
assertArrayEquals(new String[] { "1234" }, TermVectorsService.getValues(doc.rootDoc().getFields("field")));
}

public void testIgnoreAbove() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
package org.elasticsearch.index.mapper;

import com.carrotsearch.randomizedtesting.annotations.Timeout;

import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.IndexableField;
import org.elasticsearch.common.Strings;
Expand All @@ -32,6 +31,7 @@
import org.elasticsearch.common.xcontent.XContentType;
import org.elasticsearch.index.mapper.NumberFieldMapper.NumberType;
import org.elasticsearch.index.mapper.NumberFieldTypeTests.OutOfRangeSpec;
import org.elasticsearch.index.termvectors.TermVectorsService;

import java.io.ByteArrayInputStream;
import java.io.IOException;
Expand Down Expand Up @@ -250,7 +250,7 @@ public void testIgnoreMalformed() throws Exception {

IndexableField[] fields = doc.rootDoc().getFields("field");
assertEquals(0, fields.length);
assertArrayEquals(new String[] { "field" }, doc.rootDoc().getValues("_ignored"));
assertArrayEquals(new String[] { "field" }, TermVectorsService.getValues(doc.rootDoc().getFields("_ignored")));
}
}
}
Expand Down

0 comments on commit 1fc3fe3

Please sign in to comment.