Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Backport to 1.3] restart bug fix #496

Merged
merged 3 commits into from
Apr 22, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 20 additions & 5 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ plugins {
id 'nebula.ospackage' version "8.3.0" apply false
id "com.diffplug.gradle.spotless" version "3.26.1"
id 'java-library'
id 'org.gradle.test-retry' version '1.3.1'
}

tasks.withType(JavaCompile) {
Expand Down Expand Up @@ -103,7 +104,7 @@ configurations.all {
if (it.state != Configuration.State.UNRESOLVED) return
resolutionStrategy {
force "joda-time:joda-time:${versions.joda}"
force "com.fasterxml.jackson.core:jackson-core:2.12.6"
force "com.fasterxml.jackson.core:jackson-core:2.13.2"
force "commons-logging:commons-logging:${versions.commonslogging}"
force "org.apache.httpcomponents:httpcore:${versions.httpcore}"
force "commons-codec:commons-codec:${versions.commonscodec}"
Expand Down Expand Up @@ -150,8 +151,15 @@ def _numNodes = findProperty('numNodes') as Integer ?: 1

def opensearch_tmp_dir = rootProject.file('build/private/opensearch_tmp').absoluteFile
opensearch_tmp_dir.mkdirs()

boolean isCiServer = System.getenv().containsKey("CI")
test {
retry {
if (isCiServer) {
failOnPassedAfterRetry = false
maxRetries = 6
maxFailures = 10
}
}
include '**/*Tests.class'
systemProperty 'tests.security.manager', 'false'
}
Expand All @@ -164,6 +172,13 @@ task integTest(type: RestIntegTestTask) {
tasks.named("check").configure { dependsOn(integTest) }

integTest {
retry {
if (isCiServer) {
failOnPassedAfterRetry = false
maxRetries = 6
maxFailures = 10
}
}
dependsOn "bundlePlugin"
systemProperty 'tests.security.manager', 'false'
systemProperty 'java.io.tmpdir', opensearch_tmp_dir.absolutePath
Expand Down Expand Up @@ -589,9 +604,9 @@ dependencies {

// force Jackson version to avoid version conflict issue
implementation 'software.amazon.randomcutforest:randomcutforest-serialization:2.0.1'
implementation "com.fasterxml.jackson.core:jackson-core:2.12.6"
implementation "com.fasterxml.jackson.core:jackson-databind:2.12.6"
implementation "com.fasterxml.jackson.core:jackson-annotations:2.12.6"
implementation "com.fasterxml.jackson.core:jackson-core:2.13.2"
implementation "com.fasterxml.jackson.core:jackson-databind:2.13.2.2"
implementation "com.fasterxml.jackson.core:jackson-annotations:2.13.2"
compile files('lib/randomcutforest-parkservices-2.0.1.jar')
compile files('lib/randomcutforest-core-2.0.1.jar')

Expand Down
8 changes: 7 additions & 1 deletion src/main/java/org/opensearch/ad/ml/EntityColdStarter.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.opensearch.action.ActionListener;
import org.opensearch.action.support.ThreadedActionListener;
import org.opensearch.ad.AnomalyDetectorPlugin;
import org.opensearch.ad.CleanState;
import org.opensearch.ad.MaintenanceState;
import org.opensearch.ad.NodeStateManager;
import org.opensearch.ad.caching.DoorKeeper;
Expand All @@ -63,7 +64,7 @@
* Training models for HCAD detectors
*
*/
public class EntityColdStarter implements MaintenanceState {
public class EntityColdStarter implements MaintenanceState, CleanState {
private static final Logger logger = LogManager.getLogger(EntityColdStarter.class);
private final Clock clock;
private final ThreadPool threadPool;
Expand Down Expand Up @@ -743,4 +744,9 @@ public void maintenance() {
}
});
}

@Override
public void clear(String detectorId) {
doorKeepers.remove(detectorId);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.opensearch.ad.NodeStateManager;
import org.opensearch.ad.caching.CacheProvider;
import org.opensearch.ad.feature.FeatureManager;
import org.opensearch.ad.ml.EntityColdStarter;
import org.opensearch.ad.ml.ModelManager;
import org.opensearch.ad.task.ADTaskCacheManager;
import org.opensearch.cluster.service.ClusterService;
Expand All @@ -39,6 +40,7 @@ public class DeleteModelTransportAction extends
private FeatureManager featureManager;
private CacheProvider cache;
private ADTaskCacheManager adTaskCacheManager;
private EntityColdStarter coldStarter;

@Inject
public DeleteModelTransportAction(
Expand All @@ -50,7 +52,8 @@ public DeleteModelTransportAction(
ModelManager modelManager,
FeatureManager featureManager,
CacheProvider cache,
ADTaskCacheManager adTaskCacheManager
ADTaskCacheManager adTaskCacheManager,
EntityColdStarter coldStarter
) {
super(
DeleteModelAction.NAME,
Expand All @@ -68,6 +71,7 @@ public DeleteModelTransportAction(
this.featureManager = featureManager;
this.cache = cache;
this.adTaskCacheManager = adTaskCacheManager;
this.coldStarter = coldStarter;
}

@Override
Expand Down Expand Up @@ -121,6 +125,8 @@ protected DeleteModelNodeResponse nodeOperation(DeleteModelNodeRequest request)

cache.get().clear(adID);

coldStarter.clear(adID);

// delete realtime task cache
adTaskCacheManager.removeRealtimeTaskCache(adID);

Expand Down
Loading