From 64143b8357c785d47e3b1f33a1b6a29f4aabb757 Mon Sep 17 00:00:00 2001 From: Julie Tibshirani Date: Thu, 9 Jul 2020 09:41:47 -0700 Subject: [PATCH] Apply keyword normalizers in the field retrieval API. (#59260) As we discussed in the meta-issue, when returning `keyword` in the fields retrieval API, we'll apply their `normalizer`. This decision is not a clear-cut one, and we'll validate it with internal users before merging the feature branch. --- .../index/mapper/KeywordFieldMapper.java | 53 ++++++++++++------- .../index/mapper/KeywordFieldMapperTests.java | 7 +++ 2 files changed, 41 insertions(+), 19 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index df4de525b7ada..291aed7ef96f3 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -47,6 +47,7 @@ import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -361,25 +362,9 @@ protected void parseCreateField(ParseContext context) throws IOException { return; } - final NamedAnalyzer normalizer = fieldType().normalizer(); + NamedAnalyzer normalizer = fieldType().normalizer(); if (normalizer != null) { - try (TokenStream ts = normalizer.tokenStream(name(), value)) { - final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); - ts.reset(); - if (ts.incrementToken() == false) { - throw new IllegalStateException("The normalization token stream is " - + "expected to produce exactly 1 token, but got 0 for analyzer " - + normalizer + " and input \"" + value + "\""); - } - final String newValue = termAtt.toString(); - if (ts.incrementToken()) { - throw new IllegalStateException("The normalization token stream is " - + "expected to produce exactly 1 token, but got 2+ for analyzer " - + normalizer + " and input \"" + value + "\""); - } - ts.end(); - value = newValue; - } + value = normalizeValue(normalizer, value); } // convert to utf8 only once before feeding postings/dv/stored fields @@ -398,6 +383,26 @@ protected void parseCreateField(ParseContext context) throws IOException { } } + private String normalizeValue(NamedAnalyzer normalizer, String value) throws IOException { + try (TokenStream ts = normalizer.tokenStream(name(), value)) { + final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + if (ts.incrementToken() == false) { + throw new IllegalStateException("The normalization token stream is " + + "expected to produce exactly 1 token, but got 0 for analyzer " + + normalizer + " and input \"" + value + "\""); + } + final String newValue = termAtt.toString(); + if (ts.incrementToken()) { + throw new IllegalStateException("The normalization token stream is " + + "expected to produce exactly 1 token, but got 2+ for analyzer " + + normalizer + " and input \"" + value + "\""); + } + ts.end(); + return newValue; + } + } + @Override protected String parseSourceValue(Object value, String format) { if (format != null) { @@ -408,7 +413,17 @@ protected String parseSourceValue(Object value, String format) { if (keywordValue.length() > ignoreAbove) { return null; } - return keywordValue; + + NamedAnalyzer normalizer = fieldType().normalizer(); + if (normalizer == null) { + return keywordValue; + } + + try { + return normalizeValue(normalizer, keywordValue); + } catch (IOException e) { + throw new UncheckedIOException(e); + } } @Override diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index 0c6302779f6bf..ecc3e507af7d3 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -650,6 +650,13 @@ public void testParseSourceValue() { assertEquals("42", ignoreAboveMapper.parseSourceValue(42L, null)); assertEquals("true", ignoreAboveMapper.parseSourceValue(true, null)); + KeywordFieldMapper normalizerMapper = new KeywordFieldMapper.Builder("field") + .normalizer(indexService.getIndexAnalyzers(), "lowercase") + .build(context); + assertEquals("value", normalizerMapper.parseSourceValue("VALUE", null)); + assertEquals("42", normalizerMapper.parseSourceValue(42L, null)); + assertEquals("value", normalizerMapper.parseSourceValue("value", null)); + KeywordFieldMapper nullValueMapper = new KeywordFieldMapper.Builder("field") .nullValue("NULL") .build(context);