Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[7.x] Throw an exception when memory usage estimation endpoint encounters empty data frame. (#49143) #49164

Merged
merged 1 commit into from
Nov 18, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions x-pack/plugin/ml/qa/ml-with-security/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ integTest.runner {
'ml/data_frame_analytics_crud/Test put classification given num_top_classes is greater than 1k',
'ml/data_frame_analytics_crud/Test put classification given training_percent is less than one',
'ml/data_frame_analytics_crud/Test put classification given training_percent is greater than hundred',
'ml/data_frame_analytics_memory_usage_estimation/Test memory usage estimation for empty data frame',
'ml/evaluate_data_frame/Test given missing index',
'ml/evaluate_data_frame/Test given index does not exist',
'ml/evaluate_data_frame/Test given missing evaluation',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,13 @@ private void getStartContext(String id, ActionListener<StartContext> finalListen
.collectDataSummaryAsync(ActionListener.wrap(
dataSummary -> {
if (dataSummary.rows == 0) {
finalListener.onFailure(new ElasticsearchStatusException(
"Unable to start {} as there are no analyzable data in source indices [{}].",
RestStatus.BAD_REQUEST,
id,
Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
finalListener.onFailure(ExceptionsHelper.badRequestException(
"Unable to start {} as no documents in the source indices [{}] contained all the fields "
+ "selected for analysis. If you are relying on automatic field selection then there are "
+ "currently mapped fields that do not exist in any indexed documents, and you will have "
+ "to switch to explicit field selection and include only fields that exist in indexed "
+ "documents.",
id, Strings.arrayToCommaDelimitedString(startContext.config.getSource().getIndex())
));
} else {
finalListener.onResponse(startContext);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.message.ParameterizedMessage;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.unit.ByteSizeUnit;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.xpack.core.ml.dataframe.DataFrameAnalyticsConfig;
Expand Down Expand Up @@ -57,10 +58,16 @@ private MemoryUsageEstimationResult runJob(String jobId,
DataFrameDataExtractorFactory dataExtractorFactory) {
DataFrameDataExtractor dataExtractor = dataExtractorFactory.newExtractor(false);
DataFrameDataExtractor.DataSummary dataSummary = dataExtractor.collectDataSummary();
Set<String> categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis());
if (dataSummary.rows == 0) {
return new MemoryUsageEstimationResult(ByteSizeValue.ZERO, ByteSizeValue.ZERO);
throw ExceptionsHelper.badRequestException(
"[{}] Unable to estimate memory usage as no documents in the source indices [{}] contained all the fields selected for "
+ "analysis. If you are relying on automatic field selection then there are currently mapped fields that do not exist "
+ "in any indexed documents, and you will have to switch to explicit field selection and include only fields that "
+ "exist in indexed documents.",
jobId,
Strings.arrayToCommaDelimitedString(config.getSource().getIndex()));
}
Set<String> categoricalFields = dataExtractor.getCategoricalFields(config.getAnalysis());
AnalyticsProcessConfig processConfig =
new AnalyticsProcessConfig(
jobId,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ public class MemoryUsageEstimationProcessManagerTests extends ESTestCase {
private static final String CONFIG_ID = "dummy";
private static final int NUM_ROWS = 100;
private static final int NUM_COLS = 4;
private static final MemoryUsageEstimationResult PROCESS_RESULT_ZERO =
new MemoryUsageEstimationResult(ByteSizeValue.ZERO, ByteSizeValue.ZERO);
private static final MemoryUsageEstimationResult PROCESS_RESULT =
new MemoryUsageEstimationResult(ByteSizeValue.parseBytesSizeValue("20kB", ""), ByteSizeValue.parseBytesSizeValue("10kB", ""));

Expand Down Expand Up @@ -85,9 +83,11 @@ public void testRunJob_EmptyDataFrame() {

processManager.runJobAsync(TASK_ID, dataFrameAnalyticsConfig, dataExtractorFactory, listener);

verify(listener).onResponse(resultCaptor.capture());
MemoryUsageEstimationResult result = resultCaptor.getValue();
assertThat(result, equalTo(PROCESS_RESULT_ZERO));
verify(listener).onFailure(exceptionCaptor.capture());
ElasticsearchException exception = (ElasticsearchException) exceptionCaptor.getValue();
assertThat(exception.status(), equalTo(RestStatus.BAD_REQUEST));
assertThat(exception.getMessage(), containsString(TASK_ID));
assertThat(exception.getMessage(), containsString("Unable to estimate memory usage"));

verifyNoMoreInteractions(process, listener);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,27 @@ setup:
---
"Test memory usage estimation for empty data frame":
- do:
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
ml.estimate_memory_usage:
body:
source: { index: "index-source" }
analysis: { outlier_detection: {} }

- do:
index:
index: index-source
refresh: true
body: { x: 1 }
- match: { result: "created" }

# Note that value for "y" is missing and outlier detection analysis does not support missing values.
# Hence, the data frame is still considered empty.
- do:
catch: /Unable to estimate memory usage as no documents in the source indices \[index-source\] contained all the fields selected for analysis/
ml.estimate_memory_usage:
body:
source: { index: "index-source" }
analysis: { outlier_detection: {} }
- match: { expected_memory_without_disk: "0" }
- match: { expected_memory_with_disk: "0" }

---
"Test memory usage estimation for non-empty data frame":
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
}

- do:
catch: /Unable to start empty-with-compatible-fields as there are no analyzable data in source indices \[empty-index-with-compatible-fields\]/
catch: /Unable to start empty-with-compatible-fields as no documents in the source indices \[empty-index-with-compatible-fields\] contained all the fields selected for analysis/
ml.start_data_frame_analytics:
id: "empty-with-compatible-fields"
---
Expand Down