Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove rcf jar and fix zip fetching for AD and JS #497

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ jobs:
# echo "Security plugin is NOT available"
# ./gradlew integTest -Dtests.rest.cluster=localhost:9200 -Dtests.cluster=localhost:9200 -Dtests.clustername="docker-cluster"
# fi
# - name: Run AD Backwards Compatibility Tests
# run: |
# echo "Running backwards compatibility tests ..."
# ./gradlew bwcTestSuite -Dtests.security.manager=false
- name: Run AD Backwards Compatibility Tests
run: |
echo "Running backwards compatibility tests ..."
./gradlew bwcTestSuite -Dtests.security.manager=false
- name: Upload Coverage Report
uses: codecov/codecov-action@v1
with:
Expand Down
43 changes: 30 additions & 13 deletions build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ buildscript {
// 2.0.0-alpha1-SNAPSHOT -> 2.0.0.0-alpha1-SNAPSHOT
version_tokens = opensearch_version.tokenize('-')
opensearch_build = version_tokens[0] + '.0'
job_scheduler_no_snapshot = opensearch_build
plugin_no_snapshot = opensearch_build
if (buildVersionQualifier) {
opensearch_build += "-${buildVersionQualifier}"
job_scheduler_no_snapshot += "-${buildVersionQualifier}"
plugin_no_snapshot += "-${buildVersionQualifier}"
}
if (isSnapshot) {
opensearch_build += "-SNAPSHOT"
Expand All @@ -35,7 +35,9 @@ buildscript {
common_utils_version = System.getProperty("common_utils.version", opensearch_build)
job_scheduler_version = System.getProperty("job_scheduler.version", opensearch_build)
job_scheduler_build_download = 'https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/' + opensearch_no_snapshot +
'/latest/linux/x64/builds/opensearch/plugins/opensearch-job-scheduler-' + job_scheduler_no_snapshot + '.zip'
'/latest/linux/x64/builds/opensearch/plugins/opensearch-job-scheduler-' + plugin_no_snapshot + '.zip'
anomaly_detection_build_download = 'https://ci.opensearch.org/ci/dbc/distribution-build-opensearch/' + opensearch_no_snapshot +
'/latest/linux/x64/builds/opensearch/plugins/opensearch-anomaly-detection-' + plugin_no_snapshot + '.zip'
}

repositories {
Expand Down Expand Up @@ -262,6 +264,9 @@ testClusters.integTest {
return new RegularFile() {
@Override
File getAsFile() {
if (new File("$project.rootDir/$js_resource_folder").exists()) {
project.delete(files("$project.rootDir/$js_resource_folder"))
}
project.mkdir js_resource_folder
ant.get(src: job_scheduler_build_download,
dest: js_resource_folder,
Expand Down Expand Up @@ -325,7 +330,8 @@ task integTestRemote(type: RestIntegTestTask) {
String bwcVersion = "1.13.0.0"
String baseName = "adBwcCluster"
String bwcFilePath = "src/test/resources/org/opensearch/ad/bwc/"

String bwcJobSchedulerPath = bwcFilePath + "job-scheduler/" + opensearch_build
String bwcAnomalyDetectionPath = bwcFilePath + "anomaly-detection/" + opensearch_build
2.times {i ->
testClusters {
"${baseName}$i" {
Expand All @@ -338,10 +344,7 @@ String bwcFilePath = "src/test/resources/org/opensearch/ad/bwc/"
return new RegularFile() {
@Override
File getAsFile() {
ant.get(src: job_scheduler_build_download,
dest: bwcFilePath + "job-scheduler/" + opensearch_version,
httpusecaches: false)
return fileTree(bwcFilePath + "job-scheduler/" + opensearch_version).getSingleFile()
return fileTree(bwcFilePath + "job-scheduler/" + bwcVersion).getSingleFile()
}
}
}
Expand Down Expand Up @@ -370,7 +373,14 @@ List<Provider<RegularFile>> plugins = [
return new RegularFile() {
@Override
File getAsFile() {
return fileTree(bwcFilePath + "job-scheduler/" + project.version).getSingleFile()
if (new File("$project.rootDir/$bwcFilePath/job-scheduler/$opensearch_build").exists()) {
project.delete(files("$project.rootDir/$bwcFilePath/job-scheduler/$opensearch_build"))
}
project.mkdir bwcJobSchedulerPath
ant.get(src: job_scheduler_build_download,
dest: bwcJobSchedulerPath,
httpusecaches: false)
return fileTree(bwcJobSchedulerPath).getSingleFile()
}
}
}
Expand All @@ -381,7 +391,14 @@ List<Provider<RegularFile>> plugins = [
return new RegularFile() {
@Override
File getAsFile() {
return fileTree(bwcFilePath + "anomaly-detection/" + project.version).getSingleFile()
if (new File("$project.rootDir/$bwcFilePath/anomaly-detection/$opensearch_build").exists()) {
project.delete(files("$project.rootDir/$bwcFilePath/anomaly-detection/$opensearch_build"))
}
project.mkdir bwcAnomalyDetectionPath
ant.get(src: anomaly_detection_build_download,
dest: bwcAnomalyDetectionPath,
httpusecaches: false)
return fileTree(bwcAnomalyDetectionPath).getSingleFile()
}
}
}
Expand Down Expand Up @@ -590,6 +607,9 @@ dependencies {
compileOnly "org.opensearch:opensearch-job-scheduler-spi:${job_scheduler_version}"
implementation "org.opensearch:common-utils:${common_utils_version}"
implementation "org.opensearch.client:opensearch-rest-client:${opensearch_version}"
implementation 'software.amazon.randomcutforest:randomcutforest-parkservices:3.0-rc2'
Copy link
Collaborator

@kaituo kaituo Apr 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you list the tests you have done to upgrade to rc2? What I can think of:

  • real time single stream and HCAD (including create/start detector, see detector emit results; then stop cluster and restart it to see if results continue to show)
  • historical single stream and HCAD
  • backward compatible tests: v1 to v3 model, v2 to v3 model: v1 model in checkpoint should not be overridden, and we add v2 and v3 model in checkpoint if necessary. You can start with a v1 cluster, start real time single stream and hcad detector, let them run and produce results; then upgrade some nodes to v2 cluster, see if anything happens when both v1 and v2 node co-exists in the cluster (this is to simulate our blue/green deployment in service), then remove v1 nodes and let v2 run a while, then upgrade some nodes to v3, and see if they can co-exist, then remove v2 and let v3 nodes run.
  • Does memory size formula still hold (Check MemoryTracker)? No need to be exactly the same. Want to verify if the ballpark number is similar.

V1 refers to the checkpoint version we have used until #149. v2 is what we have until your PR. v3 is what your PR tries to bring.

Copy link
Member Author

@amitgalitz amitgalitz Apr 8, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested single stream real time and historical, I'll do the same for HCAD right now. BWC test were the test added based on Yaliang's code to check deserialize model from 1.3. And converting test to v1 to v3, I didn't add v2 to v3.

Copy link
Collaborator

@kaituo kaituo Apr 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The unit tests for bwc is not enough. It just checks the conversion has no exception. We will need to do some e2e testing.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I manually tested that after starting both single and HCAD (both real time and historical) with current RCF and getting results, I can switch to RCF 3.0-rc2 and there is no issues, detectors continue to run the same

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

After letting the detector run for longer I do get one issue, Failed to generate checkpoint, java.lang.IllegalStateException: There is discepancy in indices

implementation 'software.amazon.randomcutforest:randomcutforest-core:3.0-rc2'
implementation 'software.amazon.randomcutforest:randomcutforest-serialization:3.0-rc2'
implementation group: 'com.google.guava', name: 'guava', version:'31.0.1-jre'
implementation group: 'com.google.guava', name: 'failureaccess', version:'1.0.1'
implementation group: 'org.javassist', name: 'javassist', version:'3.28.0-GA'
Expand All @@ -601,12 +621,9 @@ dependencies {
implementation group: 'org.apache.commons', name: 'commons-pool2', version: '2.10.0'

// force Jackson version to avoid version conflict issue
implementation 'software.amazon.randomcutforest:randomcutforest-serialization:2.0.1'
implementation "com.fasterxml.jackson.core:jackson-core:2.13.2"
implementation "com.fasterxml.jackson.core:jackson-databind:2.13.2.2"
implementation "com.fasterxml.jackson.core:jackson-annotations:2.13.2"
implementation files('lib/randomcutforest-parkservices-2.0.1.jar')
implementation files('lib/randomcutforest-core-2.0.1.jar')

// used for serializing/deserializing rcf models.
implementation group: 'io.protostuff', name: 'protostuff-core', version: '1.8.0'
Expand Down
Binary file removed lib/randomcutforest-core-2.0.1.jar
Binary file not shown.
Binary file removed lib/randomcutforest-parkservices-2.0.1.jar
Binary file not shown.
4 changes: 2 additions & 2 deletions src/main/java/org/opensearch/ad/AnomalyDetectorPlugin.java
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@

import com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper;
import com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState;
import com.amazon.randomcutforest.serialize.json.v1.V1JsonToV2StateConverter;
import com.amazon.randomcutforest.serialize.json.v1.V1JsonToV3StateConverter;
import com.amazon.randomcutforest.state.RandomCutForestMapper;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -367,7 +367,7 @@ public Collection<Object> createComponents(
mapper.setSaveExecutorContextEnabled(true);
mapper.setSaveTreeStateEnabled(true);
mapper.setPartialTreeStateEnabled(true);
V1JsonToV2StateConverter converter = new V1JsonToV2StateConverter();
V1JsonToV3StateConverter converter = new V1JsonToV3StateConverter();

double modelMaxSizePercent = AnomalyDetectorSettings.MODEL_MAX_SIZE_PERCENTAGE.get(settings);

Expand Down
8 changes: 4 additions & 4 deletions src/main/java/org/opensearch/ad/ml/CheckpointDao.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@
import com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest;
import com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper;
import com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState;
import com.amazon.randomcutforest.serialize.json.v1.V1JsonToV2StateConverter;
import com.amazon.randomcutforest.serialize.json.v1.V1JsonToV3StateConverter;
import com.amazon.randomcutforest.state.RandomCutForestMapper;
import com.amazon.randomcutforest.state.RandomCutForestState;
import com.google.gson.Gson;
Expand Down Expand Up @@ -117,7 +117,7 @@ public class CheckpointDao {

private Gson gson;
private RandomCutForestMapper mapper;
private V1JsonToV2StateConverter converter;
private V1JsonToV3StateConverter converter;
private ThresholdedRandomCutForestMapper trcfMapper;
private Schema<ThresholdedRandomCutForestState> trcfSchema;

Expand Down Expand Up @@ -157,7 +157,7 @@ public CheckpointDao(
String indexName,
Gson gson,
RandomCutForestMapper mapper,
V1JsonToV2StateConverter converter,
V1JsonToV3StateConverter converter,
ThresholdedRandomCutForestMapper trcfMapper,
Schema<ThresholdedRandomCutForestState> trcfSchema,
Class<? extends ThresholdingModel> thresholdingModelClass,
Expand Down Expand Up @@ -556,7 +556,7 @@ public Optional<Entry<EntityModel, Instant>> fromEntityModelCheckpoint(Map<Strin
}
}

private ThresholdedRandomCutForest toTrcf(String checkpoint) {
ThresholdedRandomCutForest toTrcf(String checkpoint) {
ThresholdedRandomCutForest trcf = null;
if (checkpoint != null && !checkpoint.isEmpty()) {
try {
Expand Down
19 changes: 16 additions & 3 deletions src/test/java/org/opensearch/ad/ml/CheckpointDaoTests.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,16 @@
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.security.AccessController;
import java.security.PrivilegedAction;
import java.time.Clock;
import java.time.Instant;
import java.time.Month;
import java.time.OffsetDateTime;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -109,7 +112,7 @@
import com.amazon.randomcutforest.parkservices.ThresholdedRandomCutForest;
import com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper;
import com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState;
import com.amazon.randomcutforest.serialize.json.v1.V1JsonToV2StateConverter;
import com.amazon.randomcutforest.serialize.json.v1.V1JsonToV3StateConverter;
import com.amazon.randomcutforest.state.RandomCutForestMapper;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
Expand Down Expand Up @@ -154,7 +157,7 @@ public class CheckpointDaoTests extends OpenSearchTestCase {
private GenericObjectPool<LinkedBuffer> serializeRCFBufferPool;
private RandomCutForestMapper mapper;
private ThresholdedRandomCutForestMapper trcfMapper;
private V1JsonToV2StateConverter converter;
private V1JsonToV3StateConverter converter;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is also a V2StateToV3ForestConverter. We need to convert from v1 to v3, v2 to v3. If v2 and v3 are incompatible, we need to add a field modelV3 in checkpoint.

double anomalyRate;

@Before
Expand All @@ -180,7 +183,7 @@ public void setup() {
.getSchema(ThresholdedRandomCutForestState.class)
);

converter = new V1JsonToV2StateConverter();
converter = new V1JsonToV3StateConverter();

serializeRCFBufferPool = spy(AccessController.doPrivileged(new PrivilegedAction<GenericObjectPool<LinkedBuffer>>() {
@Override
Expand Down Expand Up @@ -1000,4 +1003,14 @@ private double[] getPoint(int dimensions, Random random) {
}
return point;
}
// .parseReader(new FileReader(new File(getClass().getResource(labalFileName).toURI()), Charset.defaultCharset()))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unused code

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will remove


public void testDeserializeRCFModel() throws Exception {
// Model in file 1_3_0_rcf_model.json not passed initialization yet
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So it means the model is empty? If yes, have you tested model fully initialized?

String filePath = getClass().getResource("1_3_0_rcf_model.json").getPath();
String json = Files.readString(Paths.get(filePath), Charset.defaultCharset());
Map map = gson.fromJson(json, Map.class);
String model = (String) ((Map) ((Map) ((ArrayList) ((Map) map.get("hits")).get("hits")).get(0)).get("_source")).get("modelV2");
ThresholdedRandomCutForest forest = checkpointDao.toTrcf(model);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add checks on the specific fields of trcf. Please check my other tests.

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@

import com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestMapper;
import com.amazon.randomcutforest.parkservices.state.ThresholdedRandomCutForestState;
import com.amazon.randomcutforest.serialize.json.v1.V1JsonToV2StateConverter;
import com.amazon.randomcutforest.serialize.json.v1.V1JsonToV3StateConverter;
import com.amazon.randomcutforest.state.RandomCutForestMapper;
import com.google.gson.Gson;

Expand Down Expand Up @@ -92,7 +92,7 @@ public void setUp() throws Exception {
maxCheckpointBytes = 1_000_000;

RandomCutForestMapper mapper = mock(RandomCutForestMapper.class);
V1JsonToV2StateConverter converter = mock(V1JsonToV2StateConverter.class);
V1JsonToV3StateConverter converter = mock(V1JsonToV3StateConverter.class);

objectPool = mock(GenericObjectPool.class);
int deserializeRCFBufferSize = 512;
Expand Down
49 changes: 49 additions & 0 deletions src/test/resources/org/opensearch/ad/ml/1_3_0_rcf_model.json

Large diffs are not rendered by default.