Skip to content

Commit

Permalink
[ML] Automatic management for ML system indices (#68044)
Browse files Browse the repository at this point in the history
The ML system indices now use the special functionality for
applying the correct mappings on first use. This replaces
the index templates that used to do this job, but were
vulnerable to tampering.

A number of other changes have had to be made to utilise
the system index functionality:

1. All fields previously missed out of mappings have been
   added to the system index mappings, with the types that
   would have been assigned dynamically in previous
   versions.  This is necessary because dynamic mappings
   updates are banned for system indices, yet some of our
   mappings allow dynamic updates.
2. As a result of the contradiction regarding dynamic
   mappings, we are now very well protected against failing
   to add new fields to the mappings for those indices that
   exhibit the contradiction (which are .ml-config and
   .ml-meta).  This means their mappings don't need to be
   explicitly compared to expected mappings in upgrade
   tests now.  Instead, any usage of a new field during or
   after upgrade will trigger an error in any test this occurs
   in.
3. Reserved fields for the config index were unnecessary
   (only used by tests) and just added extra complication,
   so they have been removed.  We have the concept of
   reserved fields for our results indices because end user
   fields get added to results and we need to ensure they
   don't clash with fields we want to use ourselves.  This
   problem does not exist for the config index.
  • Loading branch information
droberts195 authored Feb 5, 2021
1 parent e2d5183 commit 5f5968b
Show file tree
Hide file tree
Showing 34 changed files with 428 additions and 534 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ static String checkForSystemIndexViolations(SystemIndices systemIndices, Index[]
return "Cannot update mappings in "
+ violations
+ ": system indices can only use mappings from their descriptors, "
+ "but the mappings in the request did not match those in the descriptors(s)";
+ "but the mappings in the request [" + requestMappings + "] did not match those in the descriptor(s)";
}

return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ public static class Builder {
private String indexPattern;
private String primaryIndex;
private String description;
private XContentBuilder mappingsBuilder = null;
private String mappings = null;
private Settings settings = null;
private String aliasName = null;
private int indexFormat = 0;
Expand All @@ -291,7 +291,12 @@ public Builder setDescription(String description) {
}

public Builder setMappings(XContentBuilder mappingsBuilder) {
this.mappingsBuilder = mappingsBuilder;
mappings = mappingsBuilder == null ? null : Strings.toString(mappingsBuilder);
return this;
}

public Builder setMappings(String mappings) {
this.mappings = mappings;
return this;
}

Expand Down Expand Up @@ -330,7 +335,6 @@ public Builder setMinimumNodeVersion(Version version) {
* @return a populated descriptor.
*/
public SystemIndexDescriptor build() {
String mappings = mappingsBuilder == null ? null : Strings.toString(mappingsBuilder);

return new SystemIndexDescriptor(
indexPattern,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,18 @@
package org.elasticsearch.xpack.core.ml;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.xpack.core.template.TemplateUtils;

public final class MlConfigIndex {

private static final String INDEX_NAME = ".ml-config";
private static final String MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";

public static final int CONFIG_INDEX_MAX_RESULTS_WINDOW = 10_000;

/**
* The name of the index where job, datafeed and analytics configuration is stored
*
Expand All @@ -30,5 +35,13 @@ public static String mapping() {
MAPPINGS_VERSION_VARIABLE);
}

public static Settings settings() {
return Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
.put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(), CONFIG_INDEX_MAX_RESULTS_WINDOW)
.build();
}

private MlConfigIndex() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
*/
package org.elasticsearch.xpack.core.ml;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.xpack.core.template.TemplateUtils;

public final class MlMetaIndex {

private static final String INDEX_NAME = ".ml-meta";
private static final String MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";

/**
* Where to store the ml info in Elasticsearch - must match what's
Expand All @@ -20,5 +26,19 @@ public static String indexName() {
return INDEX_NAME;
}

public static String mapping() {
return TemplateUtils.loadTemplate(
"/org/elasticsearch/xpack/core/ml/meta_index_mappings.json",
Version.CURRENT.toString(),
MAPPINGS_VERSION_VARIABLE);
}

public static Settings settings() {
return Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
.build();
}

private MlMetaIndex() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
*/
package org.elasticsearch.xpack.core.ml.inference.persistence;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.xpack.core.template.TemplateUtils;

/**
* Class containing the index constants so that the index version, name, and prefix are available to a wider audience.
Expand All @@ -29,5 +33,21 @@ public final class InferenceIndexConstants {
public static final String LATEST_INDEX_NAME = INDEX_NAME_PREFIX + INDEX_VERSION;
public static final ParseField DOC_TYPE = new ParseField("doc_type");

private static final String MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";

public static String mapping() {
return TemplateUtils.loadTemplate(
"/org/elasticsearch/xpack/core/ml/inference_index_mappings.json",
Version.CURRENT.toString(),
MAPPINGS_VERSION_VARIABLE);
}

public static Settings settings() {
return Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
.build();
}

private InferenceIndexConstants() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
*/
public final class AnomalyDetectorsIndex {

public static final int CONFIG_INDEX_MAX_RESULTS_WINDOW = 10_000;

private static final String RESULTS_MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";
private static final String RESOURCE_PATH = "/org/elasticsearch/xpack/core/ml/anomalydetection/";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ public static void addDocMappingIfMissing(String alias,
String mapping = mappingSupplier.get();
PutMappingRequest putMappingRequest = new PutMappingRequest(indicesThatRequireAnUpdate);
putMappingRequest.source(mapping, XContentType.JSON);
putMappingRequest.origin(ML_ORIGIN);
executeAsyncWithOrigin(client, ML_ORIGIN, PutMappingAction.INSTANCE, putMappingRequest,
ActionListener.wrap(response -> {
if (response.isAcknowledged()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,9 @@
package org.elasticsearch.xpack.core.ml.job.results;

import org.elasticsearch.index.get.GetResult;
import org.elasticsearch.xpack.core.ml.datafeed.ChunkingConfig;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedTimingStats;
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsSource;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParams;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetection;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.DetectionRule;
import org.elasticsearch.xpack.core.ml.job.config.Detector;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.job.config.ModelPlotConfig;
import org.elasticsearch.xpack.core.ml.job.config.Operator;
import org.elasticsearch.xpack.core.ml.job.config.PerPartitionCategorizationConfig;
import org.elasticsearch.xpack.core.ml.job.config.RuleCondition;
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats;
Expand Down Expand Up @@ -214,148 +196,6 @@ public final class ReservedFieldNames {

GetResult._ID,
GetResult._INDEX
};

/**
* This array should be updated to contain all the field names that appear
* in any documents we store in our config index.
*/
private static final String[] RESERVED_CONFIG_FIELD_NAME_ARRAY = {
Job.ID.getPreferredName(),
Job.JOB_TYPE.getPreferredName(),
Job.JOB_VERSION.getPreferredName(),
Job.GROUPS.getPreferredName(),
Job.ANALYSIS_CONFIG.getPreferredName(),
Job.ANALYSIS_LIMITS.getPreferredName(),
Job.CREATE_TIME.getPreferredName(),
Job.CUSTOM_SETTINGS.getPreferredName(),
Job.DATA_DESCRIPTION.getPreferredName(),
Job.DESCRIPTION.getPreferredName(),
Job.FINISHED_TIME.getPreferredName(),
Job.MODEL_PLOT_CONFIG.getPreferredName(),
Job.RENORMALIZATION_WINDOW_DAYS.getPreferredName(),
Job.BACKGROUND_PERSIST_INTERVAL.getPreferredName(),
Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(),
Job.DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS.getPreferredName(),
Job.RESULTS_RETENTION_DAYS.getPreferredName(),
Job.MODEL_SNAPSHOT_ID.getPreferredName(),
Job.MODEL_SNAPSHOT_MIN_VERSION.getPreferredName(),
Job.RESULTS_INDEX_NAME.getPreferredName(),
Job.ALLOW_LAZY_OPEN.getPreferredName(),

AnalysisConfig.BUCKET_SPAN.getPreferredName(),
AnalysisConfig.CATEGORIZATION_FIELD_NAME.getPreferredName(),
AnalysisConfig.CATEGORIZATION_FILTERS.getPreferredName(),
AnalysisConfig.CATEGORIZATION_ANALYZER.getPreferredName(),
AnalysisConfig.PER_PARTITION_CATEGORIZATION.getPreferredName(),
AnalysisConfig.LATENCY.getPreferredName(),
AnalysisConfig.SUMMARY_COUNT_FIELD_NAME.getPreferredName(),
AnalysisConfig.DETECTORS.getPreferredName(),
AnalysisConfig.INFLUENCERS.getPreferredName(),
AnalysisConfig.MULTIVARIATE_BY_FIELDS.getPreferredName(),

AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(),
AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(),

Detector.DETECTOR_DESCRIPTION_FIELD.getPreferredName(),
Detector.FUNCTION_FIELD.getPreferredName(),
Detector.FIELD_NAME_FIELD.getPreferredName(),
Detector.BY_FIELD_NAME_FIELD.getPreferredName(),
Detector.OVER_FIELD_NAME_FIELD.getPreferredName(),
Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(),
Detector.USE_NULL_FIELD.getPreferredName(),
Detector.EXCLUDE_FREQUENT_FIELD.getPreferredName(),
Detector.CUSTOM_RULES_FIELD.getPreferredName(),
Detector.DETECTOR_INDEX.getPreferredName(),

DetectionRule.ACTIONS_FIELD.getPreferredName(),
DetectionRule.CONDITIONS_FIELD.getPreferredName(),
DetectionRule.SCOPE_FIELD.getPreferredName(),
RuleCondition.APPLIES_TO_FIELD.getPreferredName(),
RuleCondition.VALUE_FIELD.getPreferredName(),
Operator.OPERATOR_FIELD.getPreferredName(),

DataDescription.FORMAT_FIELD.getPreferredName(),
DataDescription.TIME_FIELD_NAME_FIELD.getPreferredName(),
DataDescription.TIME_FORMAT_FIELD.getPreferredName(),
DataDescription.FIELD_DELIMITER_FIELD.getPreferredName(),
DataDescription.QUOTE_CHARACTER_FIELD.getPreferredName(),

ModelPlotConfig.ENABLED_FIELD.getPreferredName(),
ModelPlotConfig.TERMS_FIELD.getPreferredName(),
ModelPlotConfig.ANNOTATIONS_ENABLED_FIELD.getPreferredName(),

PerPartitionCategorizationConfig.STOP_ON_WARN.getPreferredName(),

DatafeedConfig.ID.getPreferredName(),
DatafeedConfig.QUERY_DELAY.getPreferredName(),
DatafeedConfig.FREQUENCY.getPreferredName(),
DatafeedConfig.INDICES.getPreferredName(),
DatafeedConfig.QUERY.getPreferredName(),
DatafeedConfig.SCROLL_SIZE.getPreferredName(),
DatafeedConfig.AGGREGATIONS.getPreferredName(),
DatafeedConfig.SCRIPT_FIELDS.getPreferredName(),
DatafeedConfig.CHUNKING_CONFIG.getPreferredName(),
DatafeedConfig.HEADERS.getPreferredName(),
DatafeedConfig.DELAYED_DATA_CHECK_CONFIG.getPreferredName(),
DatafeedConfig.INDICES_OPTIONS.getPreferredName(),
DelayedDataCheckConfig.ENABLED.getPreferredName(),
DelayedDataCheckConfig.CHECK_WINDOW.getPreferredName(),

ChunkingConfig.MODE_FIELD.getPreferredName(),
ChunkingConfig.TIME_SPAN_FIELD.getPreferredName(),

DataFrameAnalyticsConfig.ID.getPreferredName(),
DataFrameAnalyticsConfig.DESCRIPTION.getPreferredName(),
DataFrameAnalyticsConfig.SOURCE.getPreferredName(),
DataFrameAnalyticsConfig.DEST.getPreferredName(),
DataFrameAnalyticsConfig.ANALYSIS.getPreferredName(),
DataFrameAnalyticsConfig.ANALYZED_FIELDS.getPreferredName(),
DataFrameAnalyticsConfig.CREATE_TIME.getPreferredName(),
DataFrameAnalyticsConfig.VERSION.getPreferredName(),
DataFrameAnalyticsConfig.MAX_NUM_THREADS.getPreferredName(),
DataFrameAnalyticsDest.INDEX.getPreferredName(),
DataFrameAnalyticsDest.RESULTS_FIELD.getPreferredName(),
DataFrameAnalyticsSource.INDEX.getPreferredName(),
DataFrameAnalyticsSource.QUERY.getPreferredName(),
DataFrameAnalyticsSource._SOURCE.getPreferredName(),
OutlierDetection.NAME.getPreferredName(),
OutlierDetection.N_NEIGHBORS.getPreferredName(),
OutlierDetection.METHOD.getPreferredName(),
OutlierDetection.FEATURE_INFLUENCE_THRESHOLD.getPreferredName(),
Regression.NAME.getPreferredName(),
Regression.DEPENDENT_VARIABLE.getPreferredName(),
Regression.LOSS_FUNCTION.getPreferredName(),
Regression.LOSS_FUNCTION_PARAMETER.getPreferredName(),
Regression.PREDICTION_FIELD_NAME.getPreferredName(),
Regression.TRAINING_PERCENT.getPreferredName(),
Regression.FEATURE_PROCESSORS.getPreferredName(),
Regression.EARLY_STOPPING_ENABLED.getPreferredName(),
Classification.NAME.getPreferredName(),
Classification.DEPENDENT_VARIABLE.getPreferredName(),
Classification.PREDICTION_FIELD_NAME.getPreferredName(),
Classification.CLASS_ASSIGNMENT_OBJECTIVE.getPreferredName(),
Classification.NUM_TOP_CLASSES.getPreferredName(),
Classification.TRAINING_PERCENT.getPreferredName(),
Classification.FEATURE_PROCESSORS.getPreferredName(),
Classification.EARLY_STOPPING_ENABLED.getPreferredName(),
BoostedTreeParams.ALPHA.getPreferredName(),
BoostedTreeParams.DOWNSAMPLE_FACTOR.getPreferredName(),
BoostedTreeParams.LAMBDA.getPreferredName(),
BoostedTreeParams.GAMMA.getPreferredName(),
BoostedTreeParams.ETA.getPreferredName(),
BoostedTreeParams.ETA_GROWTH_RATE_PER_TREE.getPreferredName(),
BoostedTreeParams.MAX_OPTIMIZATION_ROUNDS_PER_HYPERPARAMETER.getPreferredName(),
BoostedTreeParams.MAX_TREES.getPreferredName(),
BoostedTreeParams.FEATURE_BAG_FRACTION.getPreferredName(),
BoostedTreeParams.NUM_TOP_FEATURE_IMPORTANCE_VALUES.getPreferredName(),
BoostedTreeParams.SOFT_TREE_DEPTH_LIMIT.getPreferredName(),
BoostedTreeParams.SOFT_TREE_DEPTH_TOLERANCE.getPreferredName(),

ElasticsearchMappings.CONFIG_TYPE,

GetResult._ID,
GetResult._INDEX,
};

/**
Expand All @@ -379,11 +219,6 @@ public static boolean isValidFieldName(String fieldName) {
*/
public static final Set<String> RESERVED_RESULT_FIELD_NAMES = new HashSet<>(Arrays.asList(RESERVED_RESULT_FIELD_NAME_ARRAY));

/**
* A set of all reserved field names in our config.
*/
public static final Set<String> RESERVED_CONFIG_FIELD_NAMES = new HashSet<>(Arrays.asList(RESERVED_CONFIG_FIELD_NAME_ARRAY));

private ReservedFieldNames() {
}
}
Loading

0 comments on commit 5f5968b

Please sign in to comment.