Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[ML] Automatic management for ML system indices #68044

Merged
merged 11 commits into from
Feb 5, 2021
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ static String checkForSystemIndexViolations(SystemIndices systemIndices, Index[]
return "Cannot update mappings in "
+ violations
+ ": system indices can only use mappings from their descriptors, "
+ "but the mappings in the request did not match those in the descriptors(s)";
+ "but the mappings in the request [" + requestMappings + "] did not match those in the descriptor(s)";
}

return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ public static class Builder {
private String indexPattern;
private String primaryIndex;
private String description;
private XContentBuilder mappingsBuilder = null;
private String mappings = null;
private Settings settings = null;
private String aliasName = null;
private int indexFormat = 0;
Expand All @@ -291,7 +291,12 @@ public Builder setDescription(String description) {
}

public Builder setMappings(XContentBuilder mappingsBuilder) {
this.mappingsBuilder = mappingsBuilder;
mappings = mappingsBuilder == null ? null : Strings.toString(mappingsBuilder);
return this;
}

public Builder setMappings(String mappings) {
this.mappings = mappings;
return this;
}

Expand Down Expand Up @@ -330,7 +335,6 @@ public Builder setMinimumNodeVersion(Version version) {
* @return a populated descriptor.
*/
public SystemIndexDescriptor build() {
String mappings = mappingsBuilder == null ? null : Strings.toString(mappingsBuilder);

return new SystemIndexDescriptor(
indexPattern,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,18 @@
package org.elasticsearch.xpack.core.ml;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.xpack.core.template.TemplateUtils;

public final class MlConfigIndex {

private static final String INDEX_NAME = ".ml-config";
private static final String MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";

public static final int CONFIG_INDEX_MAX_RESULTS_WINDOW = 10_000;

/**
* The name of the index where job, datafeed and analytics configuration is stored
*
Expand All @@ -30,5 +35,13 @@ public static String mapping() {
MAPPINGS_VERSION_VARIABLE);
}

public static Settings settings() {
return Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
.put(IndexSettings.MAX_RESULT_WINDOW_SETTING.getKey(), CONFIG_INDEX_MAX_RESULTS_WINDOW)
.build();
}

private MlConfigIndex() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,15 @@
*/
package org.elasticsearch.xpack.core.ml;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.xpack.core.template.TemplateUtils;

public final class MlMetaIndex {

private static final String INDEX_NAME = ".ml-meta";
private static final String MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";

/**
* Where to store the ml info in Elasticsearch - must match what's
Expand All @@ -20,5 +26,19 @@ public static String indexName() {
return INDEX_NAME;
}

public static String mapping() {
return TemplateUtils.loadTemplate(
"/org/elasticsearch/xpack/core/ml/meta_index_mappings.json",
Version.CURRENT.toString(),
MAPPINGS_VERSION_VARIABLE);
}

public static Settings settings() {
return Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
.build();
}

private MlMetaIndex() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,11 @@
*/
package org.elasticsearch.xpack.core.ml.inference.persistence;

import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.xpack.core.template.TemplateUtils;

/**
* Class containing the index constants so that the index version, name, and prefix are available to a wider audience.
Expand All @@ -29,5 +33,21 @@ public final class InferenceIndexConstants {
public static final String LATEST_INDEX_NAME = INDEX_NAME_PREFIX + INDEX_VERSION;
public static final ParseField DOC_TYPE = new ParseField("doc_type");

private static final String MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";

public static String mapping() {
return TemplateUtils.loadTemplate(
"/org/elasticsearch/xpack/core/ml/inference_index_mappings.json",
Version.CURRENT.toString(),
MAPPINGS_VERSION_VARIABLE);
}

public static Settings settings() {
return Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1)
.put(IndexMetadata.SETTING_AUTO_EXPAND_REPLICAS, "0-1")
.build();
}

private InferenceIndexConstants() {}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
*/
public final class AnomalyDetectorsIndex {

public static final int CONFIG_INDEX_MAX_RESULTS_WINDOW = 10_000;

private static final String RESULTS_MAPPINGS_VERSION_VARIABLE = "xpack.ml.version";
private static final String RESOURCE_PATH = "/org/elasticsearch/xpack/core/ml/anomalydetection/";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -163,6 +163,7 @@ public static void addDocMappingIfMissing(String alias,
String mapping = mappingSupplier.get();
PutMappingRequest putMappingRequest = new PutMappingRequest(indicesThatRequireAnUpdate);
putMappingRequest.source(mapping, XContentType.JSON);
putMappingRequest.origin(ML_ORIGIN);
executeAsyncWithOrigin(client, ML_ORIGIN, PutMappingAction.INSTANCE, putMappingRequest,
ActionListener.wrap(response -> {
if (response.isAcknowledged()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,27 +7,9 @@
package org.elasticsearch.xpack.core.ml.job.results;

import org.elasticsearch.index.get.GetResult;
import org.elasticsearch.xpack.core.ml.datafeed.ChunkingConfig;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedConfig;
import org.elasticsearch.xpack.core.ml.datafeed.DatafeedTimingStats;
import org.elasticsearch.xpack.core.ml.datafeed.DelayedDataCheckConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsDest;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsSource;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.BoostedTreeParams;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Classification;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.OutlierDetection;
import org.elasticsearch.xpack.core.ml.dataframe.analyses.Regression;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisLimits;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.DetectionRule;
import org.elasticsearch.xpack.core.ml.job.config.Detector;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.job.config.ModelPlotConfig;
import org.elasticsearch.xpack.core.ml.job.config.Operator;
import org.elasticsearch.xpack.core.ml.job.config.PerPartitionCategorizationConfig;
import org.elasticsearch.xpack.core.ml.job.config.RuleCondition;
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.ModelSizeStats;
Expand Down Expand Up @@ -214,148 +196,6 @@ public final class ReservedFieldNames {

GetResult._ID,
GetResult._INDEX
};

/**
* This array should be updated to contain all the field names that appear
* in any documents we store in our config index.
*/
private static final String[] RESERVED_CONFIG_FIELD_NAME_ARRAY = {
Job.ID.getPreferredName(),
Job.JOB_TYPE.getPreferredName(),
Job.JOB_VERSION.getPreferredName(),
Job.GROUPS.getPreferredName(),
Job.ANALYSIS_CONFIG.getPreferredName(),
Job.ANALYSIS_LIMITS.getPreferredName(),
Job.CREATE_TIME.getPreferredName(),
Job.CUSTOM_SETTINGS.getPreferredName(),
Job.DATA_DESCRIPTION.getPreferredName(),
Job.DESCRIPTION.getPreferredName(),
Job.FINISHED_TIME.getPreferredName(),
Job.MODEL_PLOT_CONFIG.getPreferredName(),
Job.RENORMALIZATION_WINDOW_DAYS.getPreferredName(),
Job.BACKGROUND_PERSIST_INTERVAL.getPreferredName(),
Job.MODEL_SNAPSHOT_RETENTION_DAYS.getPreferredName(),
Job.DAILY_MODEL_SNAPSHOT_RETENTION_AFTER_DAYS.getPreferredName(),
Job.RESULTS_RETENTION_DAYS.getPreferredName(),
Job.MODEL_SNAPSHOT_ID.getPreferredName(),
Job.MODEL_SNAPSHOT_MIN_VERSION.getPreferredName(),
Job.RESULTS_INDEX_NAME.getPreferredName(),
Job.ALLOW_LAZY_OPEN.getPreferredName(),

AnalysisConfig.BUCKET_SPAN.getPreferredName(),
AnalysisConfig.CATEGORIZATION_FIELD_NAME.getPreferredName(),
AnalysisConfig.CATEGORIZATION_FILTERS.getPreferredName(),
AnalysisConfig.CATEGORIZATION_ANALYZER.getPreferredName(),
AnalysisConfig.PER_PARTITION_CATEGORIZATION.getPreferredName(),
AnalysisConfig.LATENCY.getPreferredName(),
AnalysisConfig.SUMMARY_COUNT_FIELD_NAME.getPreferredName(),
AnalysisConfig.DETECTORS.getPreferredName(),
AnalysisConfig.INFLUENCERS.getPreferredName(),
AnalysisConfig.MULTIVARIATE_BY_FIELDS.getPreferredName(),

AnalysisLimits.MODEL_MEMORY_LIMIT.getPreferredName(),
AnalysisLimits.CATEGORIZATION_EXAMPLES_LIMIT.getPreferredName(),

Detector.DETECTOR_DESCRIPTION_FIELD.getPreferredName(),
Detector.FUNCTION_FIELD.getPreferredName(),
Detector.FIELD_NAME_FIELD.getPreferredName(),
Detector.BY_FIELD_NAME_FIELD.getPreferredName(),
Detector.OVER_FIELD_NAME_FIELD.getPreferredName(),
Detector.PARTITION_FIELD_NAME_FIELD.getPreferredName(),
Detector.USE_NULL_FIELD.getPreferredName(),
Detector.EXCLUDE_FREQUENT_FIELD.getPreferredName(),
Detector.CUSTOM_RULES_FIELD.getPreferredName(),
Detector.DETECTOR_INDEX.getPreferredName(),

DetectionRule.ACTIONS_FIELD.getPreferredName(),
DetectionRule.CONDITIONS_FIELD.getPreferredName(),
DetectionRule.SCOPE_FIELD.getPreferredName(),
RuleCondition.APPLIES_TO_FIELD.getPreferredName(),
RuleCondition.VALUE_FIELD.getPreferredName(),
Operator.OPERATOR_FIELD.getPreferredName(),

DataDescription.FORMAT_FIELD.getPreferredName(),
DataDescription.TIME_FIELD_NAME_FIELD.getPreferredName(),
DataDescription.TIME_FORMAT_FIELD.getPreferredName(),
DataDescription.FIELD_DELIMITER_FIELD.getPreferredName(),
DataDescription.QUOTE_CHARACTER_FIELD.getPreferredName(),

ModelPlotConfig.ENABLED_FIELD.getPreferredName(),
ModelPlotConfig.TERMS_FIELD.getPreferredName(),
ModelPlotConfig.ANNOTATIONS_ENABLED_FIELD.getPreferredName(),

PerPartitionCategorizationConfig.STOP_ON_WARN.getPreferredName(),

DatafeedConfig.ID.getPreferredName(),
DatafeedConfig.QUERY_DELAY.getPreferredName(),
DatafeedConfig.FREQUENCY.getPreferredName(),
DatafeedConfig.INDICES.getPreferredName(),
DatafeedConfig.QUERY.getPreferredName(),
DatafeedConfig.SCROLL_SIZE.getPreferredName(),
DatafeedConfig.AGGREGATIONS.getPreferredName(),
DatafeedConfig.SCRIPT_FIELDS.getPreferredName(),
DatafeedConfig.CHUNKING_CONFIG.getPreferredName(),
DatafeedConfig.HEADERS.getPreferredName(),
DatafeedConfig.DELAYED_DATA_CHECK_CONFIG.getPreferredName(),
DatafeedConfig.INDICES_OPTIONS.getPreferredName(),
DelayedDataCheckConfig.ENABLED.getPreferredName(),
DelayedDataCheckConfig.CHECK_WINDOW.getPreferredName(),

ChunkingConfig.MODE_FIELD.getPreferredName(),
ChunkingConfig.TIME_SPAN_FIELD.getPreferredName(),

DataFrameAnalyticsConfig.ID.getPreferredName(),
DataFrameAnalyticsConfig.DESCRIPTION.getPreferredName(),
DataFrameAnalyticsConfig.SOURCE.getPreferredName(),
DataFrameAnalyticsConfig.DEST.getPreferredName(),
DataFrameAnalyticsConfig.ANALYSIS.getPreferredName(),
DataFrameAnalyticsConfig.ANALYZED_FIELDS.getPreferredName(),
DataFrameAnalyticsConfig.CREATE_TIME.getPreferredName(),
DataFrameAnalyticsConfig.VERSION.getPreferredName(),
DataFrameAnalyticsConfig.MAX_NUM_THREADS.getPreferredName(),
DataFrameAnalyticsDest.INDEX.getPreferredName(),
DataFrameAnalyticsDest.RESULTS_FIELD.getPreferredName(),
DataFrameAnalyticsSource.INDEX.getPreferredName(),
DataFrameAnalyticsSource.QUERY.getPreferredName(),
DataFrameAnalyticsSource._SOURCE.getPreferredName(),
OutlierDetection.NAME.getPreferredName(),
OutlierDetection.N_NEIGHBORS.getPreferredName(),
OutlierDetection.METHOD.getPreferredName(),
OutlierDetection.FEATURE_INFLUENCE_THRESHOLD.getPreferredName(),
Regression.NAME.getPreferredName(),
Regression.DEPENDENT_VARIABLE.getPreferredName(),
Regression.LOSS_FUNCTION.getPreferredName(),
Regression.LOSS_FUNCTION_PARAMETER.getPreferredName(),
Regression.PREDICTION_FIELD_NAME.getPreferredName(),
Regression.TRAINING_PERCENT.getPreferredName(),
Regression.FEATURE_PROCESSORS.getPreferredName(),
Regression.EARLY_STOPPING_ENABLED.getPreferredName(),
Classification.NAME.getPreferredName(),
Classification.DEPENDENT_VARIABLE.getPreferredName(),
Classification.PREDICTION_FIELD_NAME.getPreferredName(),
Classification.CLASS_ASSIGNMENT_OBJECTIVE.getPreferredName(),
Classification.NUM_TOP_CLASSES.getPreferredName(),
Classification.TRAINING_PERCENT.getPreferredName(),
Classification.FEATURE_PROCESSORS.getPreferredName(),
Classification.EARLY_STOPPING_ENABLED.getPreferredName(),
BoostedTreeParams.ALPHA.getPreferredName(),
BoostedTreeParams.DOWNSAMPLE_FACTOR.getPreferredName(),
BoostedTreeParams.LAMBDA.getPreferredName(),
BoostedTreeParams.GAMMA.getPreferredName(),
BoostedTreeParams.ETA.getPreferredName(),
BoostedTreeParams.ETA_GROWTH_RATE_PER_TREE.getPreferredName(),
BoostedTreeParams.MAX_OPTIMIZATION_ROUNDS_PER_HYPERPARAMETER.getPreferredName(),
BoostedTreeParams.MAX_TREES.getPreferredName(),
BoostedTreeParams.FEATURE_BAG_FRACTION.getPreferredName(),
BoostedTreeParams.NUM_TOP_FEATURE_IMPORTANCE_VALUES.getPreferredName(),
BoostedTreeParams.SOFT_TREE_DEPTH_LIMIT.getPreferredName(),
BoostedTreeParams.SOFT_TREE_DEPTH_TOLERANCE.getPreferredName(),

ElasticsearchMappings.CONFIG_TYPE,

GetResult._ID,
GetResult._INDEX,
};

/**
Expand All @@ -379,11 +219,6 @@ public static boolean isValidFieldName(String fieldName) {
*/
public static final Set<String> RESERVED_RESULT_FIELD_NAMES = new HashSet<>(Arrays.asList(RESERVED_RESULT_FIELD_NAME_ARRAY));

/**
* A set of all reserved field names in our config.
*/
public static final Set<String> RESERVED_CONFIG_FIELD_NAMES = new HashSet<>(Arrays.asList(RESERVED_CONFIG_FIELD_NAME_ARRAY));

private ReservedFieldNames() {
}
}
Loading