Skip to content

Commit

Permalink
Modifying Neural Sparse Tests
Browse files Browse the repository at this point in the history
Signed-off-by: Varun Jain <[email protected]>
  • Loading branch information
vibrantvarun committed Jan 10, 2024
1 parent 26485d7 commit 781551c
Show file tree
Hide file tree
Showing 8 changed files with 131 additions and 57 deletions.
2 changes: 2 additions & 0 deletions qa/restart-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
}
}

Expand All @@ -64,6 +65,7 @@ task testAgainstNewCluster(type: StandaloneRestIntegTestTask) {
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
import java.util.Optional;
import org.junit.Before;
import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
import org.opensearch.neuralsearch.BaseSparseEncodingIT;
import static org.opensearch.neuralsearch.TestUtils.CLIENT_TIMEOUT_VALUE;
import static org.opensearch.neuralsearch.TestUtils.RESTART_UPGRADE_OLD_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.BWC_VERSION;
import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX;
import org.opensearch.test.rest.OpenSearchRestTestCase;

public abstract class AbstractRestartUpgradeRestTestCase extends BaseNeuralSearchIT {
public abstract class AbstractRestartUpgradeRestTestCase extends BaseSparseEncodingIT {

@Before
protected String getIndexNameForTest() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,38 +40,36 @@ public class HybridSearchIT extends AbstractRestartUpgradeRestTestCase {
public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
if (isRunningAgainstOldCluster()) {
String index = getIndexNameForTest();
String modelId = uploadTextEmbeddingModel();
loadModel(modelId);
createPipelineProcessor(modelId, PIPELINE_NAME);
createIndexWithConfiguration(
index,
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/IndexMappings.json").toURI())),
PIPELINE_NAME
);
addDocument(index, "0", TEST_FIELD, TEXT_1, null, null);
addDocument(index, "1", TEST_FIELD, TEXT_2, null, null);
addDocument(index, "2", TEST_FIELD, TEXT_3, null, null);
addDocument(index, "3", TEST_FIELD, TEXT_4, null, null);
addDocument(index, "4", TEST_FIELD, TEXT_5, null, null);
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT_1, null, null);
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_2, null, null);
addDocument(getIndexNameForTest(), "2", TEST_FIELD, TEXT_3, null, null);
addDocument(getIndexNameForTest(), "3", TEST_FIELD, TEXT_4, null, null);
addDocument(getIndexNameForTest(), "4", TEST_FIELD, TEXT_5, null, null);
createSearchPipeline(
SEARCH_PIPELINE_NAME,
DEFAULT_NORMALIZATION_METHOD,
DEFAULT_COMBINATION_METHOD,
Map.of(PARAM_NAME_WEIGHTS, Arrays.toString(new float[] { 0.3f, 0.7f }))
);
} else {
String index = getIndexNameForTest();
Map<String, Object> pipeline = getIngestionPipeline(PIPELINE_NAME);
assertNotNull(pipeline);
String modelId = getModelId(pipeline, TEXT_EMBEDDING_PROCESSOR);
loadModel(modelId);
addDocument(index, "5", TEST_FIELD, TEXT_6, null, null);
validateTestIndex(modelId, index, SEARCH_PIPELINE_NAME);
addDocument(getIndexNameForTest(), "5", TEST_FIELD, TEXT_6, null, null);
validateTestIndex(modelId, getIndexNameForTest(), SEARCH_PIPELINE_NAME);
deleteSearchPipeline(SEARCH_PIPELINE_NAME);
deletePipeline(PIPELINE_NAME);
deleteModel(modelId);
deleteIndex(index);
deleteIndex(getIndexNameForTest());
}
}

Expand All @@ -81,38 +79,36 @@ public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() thr
public void testNormalizationProcessor_whenIndexWithSingleShard_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
if (isRunningAgainstOldCluster()) {
String index = getIndexNameForTest() + "1";
String modelId = uploadTextEmbeddingModel();
loadModel(modelId);
createPipelineProcessor(modelId, PIPELINE1_NAME);
createIndexWithConfiguration(
index,
getIndexNameForTest(),
Files.readString(Path.of(classLoader.getResource("processor/Index1Mappings.json").toURI())),
PIPELINE1_NAME
);
addDocument(index, "0", TEST_FIELD, TEXT_1, null, null);
addDocument(index, "1", TEST_FIELD, TEXT_2, null, null);
addDocument(index, "2", TEST_FIELD, TEXT_3, null, null);
addDocument(index, "3", TEST_FIELD, TEXT_4, null, null);
addDocument(index, "4", TEST_FIELD, TEXT_5, null, null);
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT_1, null, null);
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_2, null, null);
addDocument(getIndexNameForTest(), "2", TEST_FIELD, TEXT_3, null, null);
addDocument(getIndexNameForTest(), "3", TEST_FIELD, TEXT_4, null, null);
addDocument(getIndexNameForTest(), "4", TEST_FIELD, TEXT_5, null, null);
createSearchPipeline(
SEARCH_PIPELINE1_NAME,
DEFAULT_NORMALIZATION_METHOD,
DEFAULT_COMBINATION_METHOD,
Map.of(PARAM_NAME_WEIGHTS, Arrays.toString(new float[] { 0.3f, 0.7f }))
);
} else {
String index = getIndexNameForTest() + "1";
Map<String, Object> pipeline = getIngestionPipeline(PIPELINE1_NAME);
assertNotNull(pipeline);
String modelId = getModelId(pipeline, TEXT_EMBEDDING_PROCESSOR);
loadModel(modelId);
addDocument(index, "5", TEST_FIELD, TEXT_6, null, null);
validateTestIndex(modelId, index, SEARCH_PIPELINE1_NAME);
addDocument(getIndexNameForTest(), "5", TEST_FIELD, TEXT_6, null, null);
validateTestIndex(modelId, getIndexNameForTest(), SEARCH_PIPELINE1_NAME);
deleteSearchPipeline(SEARCH_PIPELINE1_NAME);
deletePipeline(PIPELINE1_NAME);
deleteModel(modelId);
deleteIndex(index);
deleteIndex(getIndexNameForTest());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,26 @@
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Map;
import org.opensearch.index.query.BoolQueryBuilder;
import org.opensearch.index.query.MatchQueryBuilder;
import org.opensearch.neuralsearch.TestUtils;
import static org.opensearch.neuralsearch.TestUtils.NODES_BWC_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.SPARSE_ENCODING_PROCESSOR;
import static org.opensearch.neuralsearch.TestUtils.objectToFloat;
import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder;

public class NeuralSparseSearchIT extends AbstractRestartUpgradeRestTestCase {
private static final String PIPELINE_NAME = "nlp-ingest-pipeline-sparse";
private static final String TEST_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world";
private static final String TEXT_2 = "Hi planet";
private static final String QUERY = "Hi world";
private static final String TEST_SPARSE_ENCODING_FIELD = "passage_embedding";
private static final String TEST_TEXT_FIELD = "passage_text";
private static final String TEXT_1 = "Hello world a b";
private static final String TEXT_2 = "Hello planet";
private static final List<String> TEST_TOKENS_1 = List.of("hello", "world", "a", "b", "c");
private static final List<String> TEST_TOKENS_2 = List.of("hello", "planet", "a", "b", "c");
private final Map<String, Float> testRankFeaturesDoc1 = TestUtils.createRandomTokenWeightMap(TEST_TOKENS_1);
private final Map<String, Float> testRankFeaturesDoc2 = TestUtils.createRandomTokenWeightMap(TEST_TOKENS_2);

// Test restart-upgrade test sparse embedding processor
// Create Sparse Encoding Processor, Ingestion Pipeline and add document
Expand All @@ -34,13 +42,28 @@ public void testSparseEncodingProcessor_E2EFlow() throws Exception {
Files.readString(Path.of(classLoader.getResource("processor/SparseIndexMappings.json").toURI())),
PIPELINE_NAME
);
addDocument(getIndexNameForTest(), "0", TEST_FIELD, TEXT_1, null, null);

addSparseEncodingDoc(
getIndexNameForTest(),
"0",
List.of(TEST_SPARSE_ENCODING_FIELD),
List.of(testRankFeaturesDoc1),
List.of(TEST_TEXT_FIELD),
List.of(TEXT_1)
);
} else {
Map<String, Object> pipeline = getIngestionPipeline(PIPELINE_NAME);
assertNotNull(pipeline);
String modelId = TestUtils.getModelId(pipeline, SPARSE_ENCODING_PROCESSOR);
loadModel(modelId);
addDocument(getIndexNameForTest(), "1", TEST_FIELD, TEXT_2, null, null);
addSparseEncodingDoc(
getIndexNameForTest(),
"1",
List.of(TEST_SPARSE_ENCODING_FIELD),
List.of(testRankFeaturesDoc2),
List.of(TEST_TEXT_FIELD),
List.of(TEXT_2)
);
validateTestIndex(modelId);
deletePipeline(PIPELINE_NAME);
deleteModel(modelId);
Expand All @@ -52,14 +75,18 @@ public void testSparseEncodingProcessor_E2EFlow() throws Exception {
private void validateTestIndex(String modelId) throws Exception {
int docCount = getDocCount(getIndexNameForTest());
assertEquals(2, docCount);
NeuralSparseQueryBuilder neuralSparseQueryBuilder = new NeuralSparseQueryBuilder();
neuralSparseQueryBuilder.fieldName("passage_embedding");
neuralSparseQueryBuilder.queryText(QUERY);
neuralSparseQueryBuilder.modelId(modelId);
Map<String, Object> response = search(getIndexNameForTest(), neuralSparseQueryBuilder, 1);
assertNotNull(response);
int hits = getHitCount(response);
assertEquals(2, hits);
BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder();
NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_SPARSE_ENCODING_FIELD)
.queryText(TEXT_1)
.modelId(modelId);
MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder(TEST_TEXT_FIELD, TEXT_1);
boolQueryBuilder.should(sparseEncodingQueryBuilder).should(matchQueryBuilder);
Map<String, Object> response = search(getIndexNameForTest(), boolQueryBuilder, 1);
Map<String, Object> firstInnerHit = getFirstInnerHit(response);

assertEquals("1", firstInnerHit.get("_id"));
float minExpectedScore = computeExpectedScore(modelId, testRankFeaturesDoc1, TEXT_1);
assertTrue(minExpectedScore < objectToFloat(firstInnerHit.get("_score")));
}

private String uploadTextEmbeddingModel() throws Exception {
Expand Down
4 changes: 4 additions & 0 deletions qa/rolling-upgrade/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ task testAgainstOldCluster(type: StandaloneRestIntegTestTask) {
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
}
}

Expand All @@ -65,6 +66,7 @@ task testAgainstOneThirdUpgradedCluster(type: StandaloneRestIntegTestTask) {
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
}
}

Expand All @@ -90,6 +92,7 @@ task testAgainstTwoThirdsUpgradedCluster(type: StandaloneRestIntegTestTask) {
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
}
}

Expand All @@ -115,6 +118,7 @@ task testRollingUpgrade(type: StandaloneRestIntegTestTask) {
filter {
excludeTestsMatching "org.opensearch.neuralsearch.bwc.MultiModalSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.HybridSearchIT.*"
excludeTestsMatching "org.opensearch.neuralsearch.bwc.NeuralSparseSearchIT.*"
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import java.util.Optional;
import org.junit.Before;
import org.opensearch.common.settings.Settings;
import org.opensearch.neuralsearch.BaseNeuralSearchIT;
import org.opensearch.neuralsearch.BaseSparseEncodingIT;
import org.opensearch.test.rest.OpenSearchRestTestCase;
import static org.opensearch.neuralsearch.TestUtils.OLD_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.MIXED_CLUSTER;
Expand All @@ -18,7 +18,7 @@
import static org.opensearch.neuralsearch.TestUtils.BWCSUITE_CLUSTER;
import static org.opensearch.neuralsearch.TestUtils.NEURAL_SEARCH_BWC_PREFIX;

public abstract class AbstractRollingUpgradeTestCase extends BaseNeuralSearchIT {
public abstract class AbstractRollingUpgradeTestCase extends BaseSparseEncodingIT {

@Before
protected String getIndexNameForTest() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.opensearch.index.query.MatchQueryBuilder;
import org.opensearch.neuralsearch.TestUtils;
Expand All @@ -30,10 +31,10 @@ public class HybridSearchIT extends AbstractRollingUpgradeTestCase {
private static final String QUERY = "Hi world";
private static final int NUM_DOCS_PER_ROUND = 1;

// Test rolling-upgrade Hybrid Search
// Create Text Embedding Processor, Ingestion Pipeline, add document and search pipeline with normalization processor
// Test rolling-upgrade normalization processor when index with multiple shards
// Create Text Embedding Processor, Ingestion Pipeline, add document and search pipeline with noramlization processor
// Validate process , pipeline and document count in rolling-upgrade scenario
public void testNormalizationProcessor_E2EFlow() throws Exception {
public void testNormalizationProcessor_whenIndexWithMultipleShards_E2EFlow() throws Exception {
waitForClusterHealthGreen(NODES_BWC_CLUSTER);
switch (getClusterType()) {
case OLD:
Expand Down Expand Up @@ -92,6 +93,12 @@ private void validateTestIndexOnUpgrade(int numberOfDocs, String modelId) throws
Map.of("search_pipeline", SEARCH_PIPELINE_NAME)
);
assertNotNull(searchResponseAsMap);
int hits = getHitCount(searchResponseAsMap);
assertEquals(1, hits);
List<Double> scoresList = getNormalizationScoreList(searchResponseAsMap);
for (Double score : scoresList) {
assertTrue(0 < score && score < 1);
}
}

private String uploadTextEmbeddingModel() throws Exception {
Expand Down
Loading

0 comments on commit 781551c

Please sign in to comment.