Skip to content

Commit

Permalink
Add constraints to dimension fields (#74939)
Browse files Browse the repository at this point in the history
This PR adds the following constraints to dimension fields:

    It must be an indexed field and must has doc values
    It cannot be multi-valued
    The number of dimension fields in the index mapping must not be more than 16. This should be configurable through an index property (index.mapping.dimension_fields.limit)
    keyword fields cannot be more than 1024 bytes long
    keyword fields must not use a normalizer

Based on the code added in PR #74450
Relates to #74660
  • Loading branch information
csoulios authored Jul 9, 2021
1 parent 95469c3 commit 91a1591
Show file tree
Hide file tree
Showing 13 changed files with 254 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING,
MapperService.INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING,
MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING,
MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING,
MapperService.INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING,
BitsetFilterCache.INDEX_LOAD_RANDOM_ACCESS_FILTERS_EAGERLY_SETTING,
IndexModule.INDEX_STORE_TYPE_SETTING,
Expand Down
12 changes: 12 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.function.Function;

import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_DEPTH_LIMIT_SETTING;
import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING;
import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING;
import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_NESTED_DOCS_LIMIT_SETTING;
import static org.elasticsearch.index.mapper.MapperService.INDEX_MAPPING_NESTED_FIELDS_LIMIT_SETTING;
Expand Down Expand Up @@ -379,6 +380,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private volatile long mappingTotalFieldsLimit;
private volatile long mappingDepthLimit;
private volatile long mappingFieldNameLengthLimit;
private volatile long mappingDimensionFieldsLimit;

/**
* The maximum number of refresh listeners allows on this shard.
Expand Down Expand Up @@ -503,6 +505,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
mappingTotalFieldsLimit = scopedSettings.get(INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING);
mappingDepthLimit = scopedSettings.get(INDEX_MAPPING_DEPTH_LIMIT_SETTING);
mappingFieldNameLengthLimit = scopedSettings.get(INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING);
mappingDimensionFieldsLimit = scopedSettings.get(INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING);

scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_COMPOUND_FORMAT_SETTING, mergePolicyConfig::setNoCFSRatio);
scopedSettings.addSettingsUpdateConsumer(MergePolicyConfig.INDEX_MERGE_POLICY_DELETES_PCT_ALLOWED_SETTING,
Expand Down Expand Up @@ -558,6 +561,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING, this::setMappingTotalFieldsLimit);
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_DEPTH_LIMIT_SETTING, this::setMappingDepthLimit);
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING, this::setMappingFieldNameLengthLimit);
scopedSettings.addSettingsUpdateConsumer(INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING, this::setMappingDimensionFieldsLimit);
}

private void setSearchIdleAfter(TimeValue searchIdleAfter) { this.searchIdleAfter = searchIdleAfter; }
Expand Down Expand Up @@ -1021,4 +1025,12 @@ public long getMappingFieldNameLengthLimit() {
private void setMappingFieldNameLengthLimit(long value) {
this.mappingFieldNameLengthLimit = value;
}

public long getMappingDimensionFieldsLimit() {
return mappingDimensionFieldsLimit;
}

private void setMappingDimensionFieldsLimit(long value) {
this.mappingDimensionFieldsLimit = value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.document.Field;
import org.apache.lucene.document.InetAddressPoint;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StoredField;
Expand Down Expand Up @@ -72,8 +73,7 @@ public static class Builder extends FieldMapper.Builder {
private final Parameter<String> onScriptError = Parameter.onScriptErrorParam(m -> toType(m).onScriptError, script);

private final Parameter<Map<String, String>> meta = Parameter.metaParam();
private final Parameter<Boolean> dimension
= Parameter.boolParam("dimension", false, m -> toType(m).dimension, false);
private final Parameter<Boolean> dimension;

private final boolean ignoreMalformedByDefault;
private final Version indexCreatedVersion;
Expand All @@ -88,6 +88,14 @@ public Builder(String name, ScriptCompiler scriptCompiler, boolean ignoreMalform
= Parameter.boolParam("ignore_malformed", true, m -> toType(m).ignoreMalformed, ignoreMalformedByDefault);
this.script.precludesParameters(nullValue, ignoreMalformed);
addScriptValidation(script, indexed, hasDocValues);
this.dimension = Parameter.boolParam("dimension", false, m -> toType(m).dimension, false)
.setValidator(v -> {
if (v && (indexed.getValue() == false || hasDocValues.getValue() == false)) {
throw new IllegalArgumentException(
"Field [dimension] requires that [" + indexed.name + "] and [" + hasDocValues.name + "] are true"
);
}
});
}

Builder nullValue(String nullValue) {
Expand Down Expand Up @@ -467,7 +475,17 @@ protected void parseCreateField(DocumentParserContext context) throws IOExceptio

private void indexValue(DocumentParserContext context, InetAddress address) {
if (indexed) {
context.doc().add(new InetAddressPoint(fieldType().name(), address));
Field field = new InetAddressPoint(fieldType().name(), address);
if (dimension) {
// Add dimension field with key so that we ensure it is single-valued.
// Dimension fields are always indexed.
if (context.doc().getByKey(fieldType().name()) != null) {
throw new IllegalArgumentException("Dimension field [" + fieldType().name() + "] cannot be a multi-valued field.");
}
context.doc().addWithKey(fieldType().name(), field);
} else {
context.doc().add(field);
}
}
if (hasDocValues) {
context.doc().add(new SortedSetDocValuesField(fieldType().name(), new BytesRef(InetAddressPoint.encode(address))));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,13 @@ public Builder(String name, IndexAnalyzers indexAnalyzers, ScriptCompiler script
this.script.precludesParameters(nullValue);
addScriptValidation(script, indexed, hasDocValues);

this.dimension = Parameter.boolParam("dimension", false, m -> toType(m).dimension, false)
.setValidator(v -> {
if (v && ignoreAbove.getValue() < ignoreAbove.getDefaultValue()) {
throw new IllegalArgumentException("Field [ignore_above] cannot be set in conjunction with field [dimension]");
}
});
this.dimension = Parameter.boolParam("dimension", false, m -> toType(m).dimension, false).setValidator(v -> {
if (v && (indexed.getValue() == false || hasDocValues.getValue() == false)) {
throw new IllegalArgumentException(
"Field [dimension] requires that [" + indexed.name + "] and [" + hasDocValues.name + "] are true"
);
}
}).precludesParameters(normalizer, ignoreAbove);
}

public Builder(String name) {
Expand Down Expand Up @@ -431,6 +432,9 @@ public boolean isDimension() {
}
}

/** The maximum keyword length allowed for a dimension field */
private static final int DIMENSION_MAX_BYTES = 1024;

private final boolean indexed;
private final boolean hasDocValues;
private final String nullValue;
Expand Down Expand Up @@ -509,9 +513,24 @@ private void indexValue(DocumentParserContext context, String value) {

// convert to utf8 only once before feeding postings/dv/stored fields
final BytesRef binaryValue = new BytesRef(value);
if (dimension && binaryValue.length > DIMENSION_MAX_BYTES) {
throw new IllegalArgumentException(
"Dimension field [" + fieldType().name() + "] cannot be more than [" + DIMENSION_MAX_BYTES + "] bytes long."
);
}
if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) {
Field field = new KeywordField(fieldType().name(), binaryValue, fieldType);
context.doc().add(field);
if (dimension) {
// Check that a dimension field is single-valued and not an array
if (context.doc().getByKey(fieldType().name()) != null) {
throw new IllegalArgumentException("Dimension field [" + fieldType().name() + "] cannot be a multi-valued field.");
}
// Add dimension field with key so that we ensure it is single-valued.
// Dimension fields are always indexed.
context.doc().addWithKey(fieldType().name(), field);
} else {
context.doc().add(field);
}

if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
context.addToFieldNames(fieldType().name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,13 @@ public boolean isAggregatable() {
}
}

/**
* @return true if field has been marked as a dimension field
*/
public boolean isDimension() {
return false;
}

/** Generates a query that will only match documents that contain the given value.
* The default implementation returns a {@link TermQuery} over the value bytes
* @throws IllegalArgumentException if {@code value} cannot be converted to the expected data type or if the field is not searchable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ public enum MergeReason {
Setting.longSetting("index.mapping.depth.limit", 20L, 1, Property.Dynamic, Property.IndexScope);
public static final Setting<Long> INDEX_MAPPING_FIELD_NAME_LENGTH_LIMIT_SETTING =
Setting.longSetting("index.mapping.field_name_length.limit", Long.MAX_VALUE, 1L, Property.Dynamic, Property.IndexScope);
public static final Setting<Long> INDEX_MAPPING_DIMENSION_FIELDS_LIMIT_SETTING =
Setting.longSetting("index.mapping.dimension_fields.limit", 16, 0, Property.Dynamic, Property.IndexScope);


private final IndexAnalyzers indexAnalyzers;
private final MappingParser mappingParser;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ void checkLimits(IndexSettings settings) {
checkObjectDepthLimit(settings.getMappingDepthLimit());
checkFieldNameLengthLimit(settings.getMappingFieldNameLengthLimit());
checkNestedLimit(settings.getMappingNestedFieldsLimit());
checkDimensionFieldLimit(settings.getMappingDimensionFieldsLimit());
}

private void checkFieldLimit(long limit) {
Expand All @@ -217,6 +218,16 @@ void checkFieldLimit(long limit, int additionalFieldsToAdd) {
}
}

private void checkDimensionFieldLimit(long limit) {
long dimensionFieldCount = fieldMappers.values()
.stream()
.filter(m -> m instanceof FieldMapper && ((FieldMapper) m).fieldType().isDimension())
.count();
if (dimensionFieldCount > limit) {
throw new IllegalArgumentException("Limit of total dimension fields [" + limit + "] has been exceeded");
}
}

private void checkObjectDepthLimit(long limit) {
for (String objectPath : objectMappers.keySet()) {
int numDots = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,11 @@ public Builder(String name, NumberType type, ScriptCompiler compiler, boolean ig
if (v && EnumSet.of(NumberType.INTEGER, NumberType.LONG, NumberType.BYTE, NumberType.SHORT).contains(type) == false) {
throw new IllegalArgumentException("Parameter [dimension] cannot be set to numeric type [" + type.name + "]");
}
if (v && (indexed.getValue() == false || hasDocValues.getValue() == false)) {
throw new IllegalArgumentException(
"Field [dimension] requires that [" + indexed.name + "] and [" + hasDocValues.name + "] are true"
);
}
});

this.script.precludesParameters(ignoreMalformed, coerce, nullValue);
Expand Down Expand Up @@ -1174,8 +1179,20 @@ private static Number value(XContentParser parser, NumberType numberType, Number
}

private void indexValue(DocumentParserContext context, Number numericValue) {
context.doc().addAll(fieldType().type.createFields(fieldType().name(), numericValue,
indexed, hasDocValues, stored));
List<Field> fields = fieldType().type.createFields(fieldType().name(), numericValue, indexed, hasDocValues, stored);
if (dimension) {
// Check that a dimension field is single-valued and not an array
if (context.doc().getByKey(fieldType().name()) != null) {
throw new IllegalArgumentException("Dimension field [" + fieldType().name() + "] cannot be a multi-valued field.");
}
if (fields.size() > 0) {
// Add the first field by key so that we can validate if it has been added
context.doc().addWithKey(fieldType().name(), fields.get(0));
context.doc().addAll(fields.subList(1, fields.size()));
}
} else {
context.doc().addAll(fields);
}

if (hasDocValues == false && (stored || indexed)) {
context.addToFieldNames(fieldType().name());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -287,4 +287,17 @@ public void testEmptyDocumentMapper() {
assertEquals(10, documentMapper.mappers().getMapping().getMetadataMappersMap().size());
assertEquals(10, documentMapper.mappers().getMatchingFieldNames("*").size());
}

public void testTooManyDimensionFields() {
// By default no more than 16 dimensions per document are supported
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> createDocumentMapper(mapping(b -> {
for (int i = 0; i < 17; i++) {
b.startObject("field" + i)
.field("type", randomFrom("ip", "keyword", "long", "integer", "byte", "short"))
.field("dimension", true)
.endObject();
}
})));
assertThat(e.getMessage(), containsString("Limit of total dimension fields [16] has been exceeded"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,45 @@ public void testDimension() throws IOException {
assertDimension(false, IpFieldMapper.IpFieldType::isDimension);
}

public void testDimensionIndexedAndDocvalues() {
{
Exception e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true).field("index", false).field("doc_values", false);
})));
assertThat(e.getCause().getMessage(),
containsString("Field [dimension] requires that [index] and [doc_values] are true"));
}
{
Exception e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true).field("index", true).field("doc_values", false);
})));
assertThat(e.getCause().getMessage(),
containsString("Field [dimension] requires that [index] and [doc_values] are true"));
}
{
Exception e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true).field("index", false).field("doc_values", true);
})));
assertThat(e.getCause().getMessage(),
containsString("Field [dimension] requires that [index] and [doc_values] are true"));
}
}

public void testDimensionMultiValuedField() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true);
}));

Exception e = expectThrows(MapperParsingException.class,
() -> mapper.parse(source(b -> b.array("field", "192.168.1.1", "192.168.1.1"))));
assertThat(e.getCause().getMessage(),
containsString("Dimension field [field] cannot be a multi-valued field"));
}

@Override
protected String generateRandomInputValue(MappedFieldType ft) {
return NetworkAddress.format(randomIp(randomBoolean()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,66 @@ public void testDimensionAndIgnoreAbove() {
containsString("Field [ignore_above] cannot be set in conjunction with field [dimension]"));
}

public void testDimensionAndNormalizer() {
Exception e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true).field("normalizer", "my_normalizer");
})));
assertThat(e.getCause().getMessage(),
containsString("Field [normalizer] cannot be set in conjunction with field [dimension]"));
}

public void testDimensionIndexedAndDocvalues() {
{
Exception e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true).field("index", false).field("doc_values", false);
})));
assertThat(e.getCause().getMessage(),
containsString("Field [dimension] requires that [index] and [doc_values] are true"));
}
{
Exception e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true).field("index", true).field("doc_values", false);
})));
assertThat(e.getCause().getMessage(),
containsString("Field [dimension] requires that [index] and [doc_values] are true"));
}
{
Exception e = expectThrows(MapperParsingException.class, () -> createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true).field("index", false).field("doc_values", true);
})));
assertThat(e.getCause().getMessage(),
containsString("Field [dimension] requires that [index] and [doc_values] are true"));
}
}

public void testDimensionMultiValuedField() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true);
}));

Exception e = expectThrows(MapperParsingException.class,
() -> mapper.parse(source(b -> b.array("field", "1234", "45678"))));
assertThat(e.getCause().getMessage(),
containsString("Dimension field [field] cannot be a multi-valued field"));
}

public void testDimensionExtraLongKeyword() throws IOException {
DocumentMapper mapper = createDocumentMapper(fieldMapping(b -> {
minimalMapping(b);
b.field("dimension", true);
}));

Exception e = expectThrows(MapperParsingException.class,
() -> mapper.parse(source(b -> b.field("field", randomAlphaOfLengthBetween(1024, 2048)))));
assertThat(e.getCause().getMessage(),
containsString("Dimension field [field] cannot be more than [1024] bytes long."));
}

public void testConfigureSimilarity() throws IOException {
MapperService mapperService = createMapperService(
fieldMapping(b -> b.field("type", "keyword").field("similarity", "boolean"))
Expand Down
Loading

0 comments on commit 91a1591

Please sign in to comment.