Skip to content

Commit

Permalink
[ML] Rename df-analytics _id_copy to ml__id_copy (#43754)
Browse files Browse the repository at this point in the history
Renames `_id_copy` to `ml__id_copy` as field names starting with
underscore are deprecated. The new field name `ml__id_copy` was
chosen as an obscure enough field that users won't have in their data.
Otherwise, this field is only intented to be used by df-analytics.
  • Loading branch information
dimitris-athanasiou authored Jun 29, 2019
1 parent e6444d3 commit 956b630
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 34 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,10 @@ public void testOutlierDetectionWithFewDocuments() throws Exception {
assertThat(destDoc.get(field), equalTo(sourceDoc.get(field)));
}
assertThat(destDoc.containsKey("ml"), is(true));

@SuppressWarnings("unchecked")
Map<String, Object> resultsObject = (Map<String, Object>) destDoc.get("ml");

assertThat(resultsObject.containsKey("outlier_score"), is(true));
double outlierScore = (double) resultsObject.get("outlier_score");
assertThat(outlierScore, allOf(greaterThanOrEqualTo(0.0), lessThanOrEqualTo(100.0)));
Expand Down Expand Up @@ -209,7 +212,10 @@ public void testOutlierDetectionWithMoreFieldsThanDocValueFieldLimit() throws Ex
assertThat(destDoc.get(field), equalTo(sourceDoc.get(field)));
}
assertThat(destDoc.containsKey("ml"), is(true));

@SuppressWarnings("unchecked")
Map<String, Object> resultsObject = (Map<String, Object>) destDoc.get("ml");

assertThat(resultsObject.containsKey("outlier_score"), is(true));
double outlierScore = (double) resultsObject.get("outlier_score");
assertThat(outlierScore, allOf(greaterThanOrEqualTo(0.0), lessThanOrEqualTo(100.0)));
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,16 @@
/**
* {@link DataFrameAnalyticsIndex} class encapsulates logic for creating destination index based on source index metadata.
*/
final class DataFrameAnalyticsIndex {
public final class DataFrameAnalyticsIndex {

public static final String ID_COPY = "ml__id_copy";

// Metadata fields
static final String CREATION_DATE_MILLIS = "creation_date_in_millis";
static final String VERSION = "version";
static final String CREATED = "created";
static final String CREATED_BY = "created_by";
static final String ANALYTICS = "analytics";

private static final String PROPERTIES = "properties";
private static final String META = "_meta";
Expand Down Expand Up @@ -121,7 +130,7 @@ private static Settings settings(GetSettingsResponse settingsResponse) {
Integer maxNumberOfReplicas = findMaxSettingValue(settingsResponse, IndexMetaData.SETTING_NUMBER_OF_REPLICAS);

Settings.Builder settingsBuilder = Settings.builder();
settingsBuilder.put(IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey(), DataFrameAnalyticsFields.ID);
settingsBuilder.put(IndexSortConfig.INDEX_SORT_FIELD_SETTING.getKey(), ID_COPY);
settingsBuilder.put(IndexSortConfig.INDEX_SORT_ORDER_SETTING.getKey(), SortOrder.ASC);
if (maxNumberOfShards != null) {
settingsBuilder.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, maxNumberOfShards);
Expand All @@ -148,15 +157,15 @@ private static Integer findMaxSettingValue(GetSettingsResponse settingsResponse,

private static void addProperties(Map<String, Object> mappingsAsMap) {
Map<String, Object> properties = getOrPutDefault(mappingsAsMap, PROPERTIES, HashMap::new);
properties.put(DataFrameAnalyticsFields.ID, Map.of("type", "keyword"));
properties.put(ID_COPY, Map.of("type", "keyword"));
}

private static void addMetaData(Map<String, Object> mappingsAsMap, String analyticsId, Clock clock) {
Map<String, Object> metadata = getOrPutDefault(mappingsAsMap, META, HashMap::new);
metadata.put(DataFrameAnalyticsFields.CREATION_DATE_MILLIS, clock.millis());
metadata.put(DataFrameAnalyticsFields.CREATED_BY, "data-frame-analytics");
metadata.put(DataFrameAnalyticsFields.VERSION, Map.of(DataFrameAnalyticsFields.CREATED, Version.CURRENT));
metadata.put(DataFrameAnalyticsFields.ANALYTICS, analyticsId);
metadata.put(CREATION_DATE_MILLIS, clock.millis());
metadata.put(CREATED_BY, "data-frame-analytics");
metadata.put(VERSION, Map.of(CREATED, Version.CURRENT));
metadata.put(ANALYTICS, analyticsId);
}

private static <K, V> V getOrPutDefault(Map<K, Object> map, K key, Supplier<V> valueSupplier) {
Expand All @@ -176,7 +185,7 @@ public static void updateMappingsToDestIndex(Client client, DataFrameAnalyticsCo
ImmutableOpenMap<String, MappingMetaData> mappings = getIndexResponse.getMappings().get(getIndexResponse.indices()[0]);
String type = mappings.keysIt().next();

Map<String, Object> addedMappings = Map.of(PROPERTIES, Map.of(DataFrameAnalyticsFields.ID, Map.of("type", "keyword")));
Map<String, Object> addedMappings = Map.of(PROPERTIES, Map.of(ID_COPY, Map.of("type", "keyword")));

PutMappingRequest putMappingRequest = new PutMappingRequest(getIndexResponse.indices());
putMappingRequest.type(type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ private void reindexDataframeAndStartAnalysis(DataFrameAnalyticsTask task, DataF
reindexRequest.setSourceIndices(config.getSource().getIndex());
reindexRequest.setSourceQuery(config.getSource().getParsedQuery());
reindexRequest.setDestIndex(config.getDest().getIndex());
reindexRequest.setScript(new Script("ctx._source." + DataFrameAnalyticsFields.ID + " = ctx._id"));
reindexRequest.setScript(new Script("ctx._source." + DataFrameAnalyticsIndex.ID_COPY + " = ctx._id"));

final ThreadContext threadContext = client.threadPool().getThreadContext();
final Supplier<ThreadContext.StoredContext> supplier = threadContext.newRestorableContext(false);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import org.elasticsearch.search.sort.SortOrder;
import org.elasticsearch.xpack.core.ClientHelper;
import org.elasticsearch.xpack.ml.datafeed.extractor.fields.ExtractedField;
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsFields;
import org.elasticsearch.xpack.ml.dataframe.DataFrameAnalyticsIndex;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -126,7 +126,7 @@ private SearchRequestBuilder buildSearchRequest() {
.setScroll(SCROLL_TIMEOUT)
// This ensures the search throws if there are failures and the scroll context gets cleared automatically
.setAllowPartialSearchResults(false)
.addSort(DataFrameAnalyticsFields.ID, SortOrder.ASC)
.addSort(DataFrameAnalyticsIndex.ID_COPY, SortOrder.ASC)
.setIndices(context.indices)
.setSize(context.scrollSize)
.setQuery(context.query);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,12 @@ public void testCreateDestinationIndex() throws IOException {
containsInAnyOrder("index.number_of_shards", "index.number_of_replicas", "index.sort.field", "index.sort.order"));
assertThat(createIndexRequest.settings().getAsInt("index.number_of_shards", -1), equalTo(5));
assertThat(createIndexRequest.settings().getAsInt("index.number_of_replicas", -1), equalTo(1));
assertThat(createIndexRequest.settings().get("index.sort.field"), equalTo("_id_copy"));
assertThat(createIndexRequest.settings().get("index.sort.field"), equalTo("ml__id_copy"));
assertThat(createIndexRequest.settings().get("index.sort.order"), equalTo("asc"));

try (XContentParser parser = createParser(JsonXContent.jsonXContent, createIndexRequest.mappings().get("_doc"))) {
Map<String, Object> map = parser.map();
assertThat(extractValue("_doc.properties._id_copy.type", map), equalTo("keyword"));
assertThat(extractValue("_doc.properties.ml__id_copy.type", map), equalTo("keyword"));
assertThat(extractValue("_doc.properties.field_1", map), equalTo("field_1_mappings"));
assertThat(extractValue("_doc.properties.field_2", map), equalTo("field_2_mappings"));
assertThat(extractValue("_doc._meta.analytics", map), equalTo(ANALYTICS_ID));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ public void testTwoPageExtraction() throws IOException {
assertThat(searchRequest, containsString("\"query\":{\"match_all\":{\"boost\":1.0}}"));
assertThat(searchRequest, containsString("\"docvalue_fields\":[{\"field\":\"field_1\"},{\"field\":\"field_2\"}]"));
assertThat(searchRequest, containsString("\"_source\":{\"includes\":[],\"excludes\":[]}"));
assertThat(searchRequest, containsString("\"sort\":[{\"_id_copy\":{\"order\":\"asc\"}}]"));
assertThat(searchRequest, containsString("\"sort\":[{\"ml__id_copy\":{\"order\":\"asc\"}}]"));

// Check continue scroll requests had correct ids
assertThat(dataExtractor.capturedContinueScrollIds.size(), equalTo(2));
Expand Down

0 comments on commit 956b630

Please sign in to comment.