Skip to content

Commit

Permalink
[ML] Do not run data cleanup on the client thread (#31691)
Browse files Browse the repository at this point in the history
The ML expired data cleanup executes blocking calls. These calls need to
be forked off the network thread or they can end up blocking all
networking threads waiting for these blocking calls to finish. This
commit moves the calls off the networking thread by forking to the ML
utility thread pool.
  • Loading branch information
davidkyle authored and jasontedor committed Jun 29, 2018
1 parent d554df7 commit eb782d0
Show file tree
Hide file tree
Showing 5 changed files with 604 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.transport.Transports;
import org.elasticsearch.xpack.core.ClientHelper;
import org.elasticsearch.xpack.core.ml.action.DeleteExpiredDataAction;
import org.elasticsearch.xpack.ml.MachineLearning;
Expand Down Expand Up @@ -66,10 +67,16 @@ private void deleteExpiredData(ActionListener<DeleteExpiredDataAction.Response>

private void deleteExpiredData(Iterator<MlDataRemover> mlDataRemoversIterator,
ActionListener<DeleteExpiredDataAction.Response> listener) {
// Removing expired ML data and artifacts requires multiple operations.
// These are queued up and executed sequentially in the action listener,
// the chained calls must all run the ML utility thread pool NOT the thread
// the previous action returned in which in the case of a transport_client_boss
// thread is a disaster.
if (mlDataRemoversIterator.hasNext()) {
MlDataRemover remover = mlDataRemoversIterator.next();
remover.remove(ActionListener.wrap(
booleanResponse -> deleteExpiredData(mlDataRemoversIterator, listener),
booleanResponse -> threadPool.executor(MachineLearning.UTILITY_THREAD_POOL_NAME).execute(() ->
deleteExpiredData(mlDataRemoversIterator, listener)),
listener::onFailure));
} else {
logger.info("Completed deletion of expired data");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.elasticsearch.search.sort.SortBuilders;
import org.elasticsearch.transport.Transports;
import org.elasticsearch.xpack.core.ml.job.persistence.ElasticsearchMappings;
import org.elasticsearch.xpack.core.ml.utils.MlIndicesUtils;

Expand Down
85 changes: 85 additions & 0 deletions x-pack/qa/ml-native-multi-node-tests/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
import org.elasticsearch.gradle.LoggedExec

apply plugin: 'elasticsearch.standalone-rest-test'
apply plugin: 'elasticsearch.rest-test'

dependencies {
testCompile project(path: xpackModule('core'), configuration: 'runtime')
testCompile project(path: xpackModule('core'), configuration: 'testArtifacts')
testCompile project(path: xpackModule('ml'), configuration: 'runtime')
testCompile project(path: xpackModule('ml'), configuration: 'testArtifacts')
}

integTestRunner {
/*
* We have to disable setting the number of available processors as tests in the same JVM randomize processors and will step on each
* other if we allow them to set the number of available processors as it's set-once in Netty.
*/
systemProperty 'es.set.netty.runtime.available.processors', 'false'
}

// location of generated keystores and certificates
File keystoreDir = new File(project.buildDir, 'keystore')

// Generate the node's keystore
File nodeKeystore = new File(keystoreDir, 'test-node.jks')
task createNodeKeyStore(type: LoggedExec) {
doFirst {
if (nodeKeystore.parentFile.exists() == false) {
nodeKeystore.parentFile.mkdirs()
}
if (nodeKeystore.exists()) {
delete nodeKeystore
}
}
executable = new File(project.runtimeJavaHome, 'bin/keytool')
standardInput = new ByteArrayInputStream('FirstName LastName\nUnit\nOrganization\nCity\nState\nNL\nyes\n\n'.getBytes('UTF-8'))
args '-genkey',
'-alias', 'test-node',
'-keystore', nodeKeystore,
'-keyalg', 'RSA',
'-keysize', '2048',
'-validity', '712',
'-dname', 'CN=smoke-test-plugins-ssl',
'-keypass', 'keypass',
'-storepass', 'keypass'
}

// Add keystores to test classpath: it expects it there
sourceSets.test.resources.srcDir(keystoreDir)
processTestResources.dependsOn(createNodeKeyStore)

integTestCluster {
dependsOn createNodeKeyStore
setting 'xpack.security.enabled', 'true'
setting 'xpack.ml.enabled', 'true'
setting 'logger.org.elasticsearch.xpack.ml.datafeed', 'TRACE'
setting 'xpack.monitoring.enabled', 'false'
setting 'xpack.security.authc.token.enabled', 'true'
setting 'xpack.security.transport.ssl.enabled', 'true'
setting 'xpack.security.transport.ssl.keystore.path', nodeKeystore.name
setting 'xpack.security.transport.ssl.verification_mode', 'certificate'
setting 'xpack.security.audit.enabled', 'true'
setting 'xpack.license.self_generated.type', 'trial'

keystoreSetting 'bootstrap.password', 'x-pack-test-password'
keystoreSetting 'xpack.security.transport.ssl.keystore.secure_password', 'keypass'

numNodes = 3

setupCommand 'setupDummyUser',
'bin/elasticsearch-users', 'useradd', 'x_pack_rest_user', '-p', 'x-pack-test-password', '-r', 'superuser'

extraConfigFile nodeKeystore.name, nodeKeystore

waitCondition = { node, ant ->
File tmpFile = new File(node.cwd, 'wait.success')
ant.get(src: "http://${node.httpUri()}/_cluster/health?wait_for_nodes=>=${numNodes}&wait_for_status=yellow",
dest: tmpFile.toString(),
username: 'x_pack_rest_user',
password: 'x-pack-test-password',
ignoreerrors: true,
retries: 10)
return tmpFile.exists()
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;

import static org.hamcrest.Matchers.equalTo;
Expand Down
Loading

0 comments on commit eb782d0

Please sign in to comment.