From ddb5d69d16b88da62f864146d9d29252c2d3773e Mon Sep 17 00:00:00 2001 From: zane-neo Date: Thu, 28 Sep 2023 14:12:59 +0800 Subject: [PATCH] Address PR comments and change processor and query clause name. (#9) * Address code review comments Signed-off-by: zane-neo * Change lower case neural_sparse to upper case Signed-off-by: zane-neo * Change back processor type name to sparse_encoding Signed-off-by: zane-neo * Change names Signed-off-by: zane-neo * Format code Signed-off-by: zane-neo --------- Signed-off-by: zane-neo --- CHANGELOG.md | 2 +- .../lucene}/BoundedLinearFeatureQuery.java | 7 +- .../neuralsearch/plugin/NeuralSearch.java | 6 +- ...Processor.java => InferenceProcessor.java} | 10 +- .../processor/SparseEncodingProcessor.java | 2 +- .../processor/TextEmbeddingProcessor.java | 2 +- ...der.java => NeuralSparseQueryBuilder.java} | 27 ++--- .../neuralsearch/util/TokenWeightUtil.java | 2 +- ...ava => NeuralSparseQueryBuilderTests.java} | 61 +++++----- ...gQueryIT.java => NeuralSparseQueryIT.java} | 106 +++++++++--------- .../SparseEncodingIndexMappings.json | 2 +- .../SparseEncodingPipelineConfiguration.json | 2 +- .../UploadSparseEncodingModelRequestBody.json | 2 +- 13 files changed, 120 insertions(+), 111 deletions(-) rename src/main/java/org/{opensearch/neuralsearch/query => apache/lucene}/BoundedLinearFeatureQuery.java (94%) rename src/main/java/org/opensearch/neuralsearch/processor/{NLPProcessor.java => InferenceProcessor.java} (97%) rename src/main/java/org/opensearch/neuralsearch/query/{SparseEncodingQueryBuilder.java => NeuralSparseQueryBuilder.java} (91%) rename src/test/java/org/opensearch/neuralsearch/query/{SparseEncodingQueryBuilderTests.java => NeuralSparseQueryBuilderTests.java} (83%) rename src/test/java/org/opensearch/neuralsearch/query/{SparseEncodingQueryIT.java => NeuralSparseQueryIT.java} (70%) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6182b359e..15532572f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,7 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), ### Features Support sparse semantic retrieval by introducing `sparse_encoding` ingest processor and query builder ([#333](https://github.com/opensearch-project/neural-search/pull/333)) ### Enhancements -Add `max_token_score` parameter to improve the execution efficiency for `sparse_encoding` query clause ([#348](https://github.com/opensearch-project/neural-search/pull/348)) +Add `max_token_score` parameter to improve the execution efficiency for `neural_sparse` query clause ([#348](https://github.com/opensearch-project/neural-search/pull/348)) ### Bug Fixes ### Infrastructure ### Documentation diff --git a/src/main/java/org/opensearch/neuralsearch/query/BoundedLinearFeatureQuery.java b/src/main/java/org/apache/lucene/BoundedLinearFeatureQuery.java similarity index 94% rename from src/main/java/org/opensearch/neuralsearch/query/BoundedLinearFeatureQuery.java rename to src/main/java/org/apache/lucene/BoundedLinearFeatureQuery.java index eb51ad146..617662363 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/BoundedLinearFeatureQuery.java +++ b/src/main/java/org/apache/lucene/BoundedLinearFeatureQuery.java @@ -33,7 +33,7 @@ * build the query and add an upperbound to it. */ -package org.opensearch.neuralsearch.query; +package org.apache.lucene; import java.io.IOException; import java.util.Objects; @@ -66,6 +66,10 @@ * To mitigate this issue, we rewrite the FeatureQuery to BoundedLinearFeatureQuery. The caller can * set the token score upperbound of this query. And according to our use case, we use LinearFunction * as the score function. + * + * This class combines both FeatureQuery + * and FeatureField together + * and will be deprecated after OpenSearch upgraded lucene to version 9.8. */ public final class BoundedLinearFeatureQuery extends Query { @@ -219,6 +223,7 @@ public String toString(String field) { // the field and decodeFeatureValue are modified from FeatureField.decodeFeatureValue static final int MAX_FREQ = Float.floatToIntBits(Float.MAX_VALUE) >>> 15; + // Rewriting this function to make scoreUpperBound work. private float decodeFeatureValue(float freq) { if (freq > MAX_FREQ) { return scoreUpperBound; diff --git a/src/main/java/org/opensearch/neuralsearch/plugin/NeuralSearch.java b/src/main/java/org/opensearch/neuralsearch/plugin/NeuralSearch.java index 2ac8853e4..3ce9582f4 100644 --- a/src/main/java/org/opensearch/neuralsearch/plugin/NeuralSearch.java +++ b/src/main/java/org/opensearch/neuralsearch/plugin/NeuralSearch.java @@ -41,7 +41,7 @@ import org.opensearch.neuralsearch.processor.normalization.ScoreNormalizer; import org.opensearch.neuralsearch.query.HybridQueryBuilder; import org.opensearch.neuralsearch.query.NeuralQueryBuilder; -import org.opensearch.neuralsearch.query.SparseEncodingQueryBuilder; +import org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder; import org.opensearch.neuralsearch.search.query.HybridQueryPhaseSearcher; import org.opensearch.plugins.ActionPlugin; import org.opensearch.plugins.ExtensiblePlugin; @@ -81,7 +81,7 @@ public Collection createComponents( final Supplier repositoriesServiceSupplier ) { NeuralQueryBuilder.initialize(clientAccessor); - SparseEncodingQueryBuilder.initialize(clientAccessor); + NeuralSparseQueryBuilder.initialize(clientAccessor); normalizationProcessorWorkflow = new NormalizationProcessorWorkflow(new ScoreNormalizer(), new ScoreCombiner()); return List.of(clientAccessor); } @@ -91,7 +91,7 @@ public List> getQueries() { return Arrays.asList( new QuerySpec<>(NeuralQueryBuilder.NAME, NeuralQueryBuilder::new, NeuralQueryBuilder::fromXContent), new QuerySpec<>(HybridQueryBuilder.NAME, HybridQueryBuilder::new, HybridQueryBuilder::fromXContent), - new QuerySpec<>(SparseEncodingQueryBuilder.NAME, SparseEncodingQueryBuilder::new, SparseEncodingQueryBuilder::fromXContent) + new QuerySpec<>(NeuralSparseQueryBuilder.NAME, NeuralSparseQueryBuilder::new, NeuralSparseQueryBuilder::fromXContent) ); } diff --git a/src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java similarity index 97% rename from src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java rename to src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java index 4ac63d419..acf0eb32b 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/NLPProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/InferenceProcessor.java @@ -32,7 +32,7 @@ * and set the target fields according to the field name map. */ @Log4j2 -public abstract class NLPProcessor extends AbstractProcessor { +public abstract class InferenceProcessor extends AbstractProcessor { public static final String MODEL_ID_FIELD = "model_id"; public static final String FIELD_MAP_FIELD = "field_map"; @@ -51,7 +51,7 @@ public abstract class NLPProcessor extends AbstractProcessor { private final Environment environment; - public NLPProcessor( + public InferenceProcessor( String tag, String description, String type, @@ -249,7 +249,7 @@ protected void setVectorFieldsToDocument(IngestDocument ingestDocument, Map buildNLPResult(Map processorMap, List results, Map sourceAndMetadataMap) { - NLPProcessor.IndexWrapper indexWrapper = new NLPProcessor.IndexWrapper(0); + InferenceProcessor.IndexWrapper indexWrapper = new InferenceProcessor.IndexWrapper(0); Map result = new LinkedHashMap<>(); for (Map.Entry knnMapEntry : processorMap.entrySet()) { String knnKey = knnMapEntry.getKey(); @@ -270,7 +270,7 @@ private void putNLPResultToSourceMapForMapType( String processorKey, Object sourceValue, List results, - NLPProcessor.IndexWrapper indexWrapper, + InferenceProcessor.IndexWrapper indexWrapper, Map sourceAndMetadataMap ) { if (processorKey == null || sourceAndMetadataMap == null || sourceValue == null) return; @@ -294,7 +294,7 @@ private void putNLPResultToSourceMapForMapType( private List> buildNLPResultForListType( List sourceValue, List results, - NLPProcessor.IndexWrapper indexWrapper + InferenceProcessor.IndexWrapper indexWrapper ) { List> keyToResult = new ArrayList<>(); IntStream.range(0, sourceValue.size()) diff --git a/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java index 275117809..b5bb85aac 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/SparseEncodingProcessor.java @@ -22,7 +22,7 @@ * and field_map can be used to indicate which fields needs text embedding and the corresponding keys for the sparse encoding results. */ @Log4j2 -public final class SparseEncodingProcessor extends NLPProcessor { +public final class SparseEncodingProcessor extends InferenceProcessor { public static final String TYPE = "sparse_encoding"; public static final String LIST_TYPE_NESTED_MAP_KEY = "sparse_encoding"; diff --git a/src/main/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessor.java b/src/main/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessor.java index 1df60baea..c30d14caf 100644 --- a/src/main/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessor.java +++ b/src/main/java/org/opensearch/neuralsearch/processor/TextEmbeddingProcessor.java @@ -21,7 +21,7 @@ * and field_map can be used to indicate which fields needs text embedding and the corresponding keys for the text embedding results. */ @Log4j2 -public final class TextEmbeddingProcessor extends NLPProcessor { +public final class TextEmbeddingProcessor extends InferenceProcessor { public static final String TYPE = "text_embedding"; public static final String LIST_TYPE_NESTED_MAP_KEY = "knn"; diff --git a/src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java b/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java similarity index 91% rename from src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java rename to src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java index b36777843..fd15b431b 100644 --- a/src/main/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilder.java +++ b/src/main/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilder.java @@ -21,6 +21,7 @@ import org.apache.commons.lang.StringUtils; import org.apache.commons.lang.builder.EqualsBuilder; import org.apache.commons.lang.builder.HashCodeBuilder; +import org.apache.lucene.BoundedLinearFeatureQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; @@ -44,7 +45,7 @@ import com.google.common.annotations.VisibleForTesting; /** - * SparseEncodingQueryBuilder is responsible for handling "sparse_encoding" query types. It uses an ML SPARSE_ENCODING model + * SparseEncodingQueryBuilder is responsible for handling "neural_sparse" query types. It uses an ML NEURAL_SPARSE model * or SPARSE_TOKENIZE model to produce a Map with String keys and Float values for input text. Then it will be transformed * to Lucene FeatureQuery wrapped by Lucene BooleanQuery. */ @@ -55,8 +56,8 @@ @Accessors(chain = true, fluent = true) @NoArgsConstructor @AllArgsConstructor -public class SparseEncodingQueryBuilder extends AbstractQueryBuilder { - public static final String NAME = "sparse_encoding"; +public class NeuralSparseQueryBuilder extends AbstractQueryBuilder { + public static final String NAME = "neural_sparse"; @VisibleForTesting static final ParseField QUERY_TEXT_FIELD = new ParseField("query_text"); @VisibleForTesting @@ -67,7 +68,7 @@ public class SparseEncodingQueryBuilder extends AbstractQueryBuilder queryTokens = queryTokensSupplier.get(); validateQueryTokens(queryTokens); - final Float scoreUpperBound = null != maxTokenScore ? maxTokenScore : Float.MAX_VALUE; + final Float scoreUpperBound = maxTokenScore != null ? maxTokenScore : Float.MAX_VALUE; BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (Map.Entry entry : queryTokens.entrySet()) { @@ -272,7 +273,7 @@ private static void validateQueryTokens(Map queryTokens) { } @Override - protected boolean doEquals(SparseEncodingQueryBuilder obj) { + protected boolean doEquals(NeuralSparseQueryBuilder obj) { if (this == obj) return true; if (obj == null || getClass() != obj.getClass()) return false; EqualsBuilder equalsBuilder = new EqualsBuilder().append(fieldName, obj.fieldName) diff --git a/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java b/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java index 76ce0fa16..853fc743d 100644 --- a/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java +++ b/src/main/java/org/opensearch/neuralsearch/util/TokenWeightUtil.java @@ -12,7 +12,7 @@ import java.util.stream.Collectors; /** - * Utility class for working with sparse_encoding queries and ingest processor. + * Utility class for working with neural_sparse queries and ingest processor. * Used to fetch the (token, weight) Map from the response returned by {@link org.opensearch.neuralsearch.ml.MLCommonsClientAccessor} * */ diff --git a/src/test/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilderTests.java b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java similarity index 83% rename from src/test/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilderTests.java rename to src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java index 12a348a47..34850dcb7 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/SparseEncodingQueryBuilderTests.java +++ b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryBuilderTests.java @@ -11,10 +11,10 @@ import static org.opensearch.index.query.AbstractQueryBuilder.BOOST_FIELD; import static org.opensearch.index.query.AbstractQueryBuilder.NAME_FIELD; import static org.opensearch.neuralsearch.TestUtils.xContentBuilderToMap; -import static org.opensearch.neuralsearch.query.SparseEncodingQueryBuilder.MAX_TOKEN_SCORE_FIELD; -import static org.opensearch.neuralsearch.query.SparseEncodingQueryBuilder.MODEL_ID_FIELD; -import static org.opensearch.neuralsearch.query.SparseEncodingQueryBuilder.NAME; -import static org.opensearch.neuralsearch.query.SparseEncodingQueryBuilder.QUERY_TEXT_FIELD; +import static org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder.MAX_TOKEN_SCORE_FIELD; +import static org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder.MODEL_ID_FIELD; +import static org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder.NAME; +import static org.opensearch.neuralsearch.query.NeuralSparseQueryBuilder.QUERY_TEXT_FIELD; import java.io.IOException; import java.util.List; @@ -43,7 +43,7 @@ import org.opensearch.neuralsearch.ml.MLCommonsClientAccessor; import org.opensearch.test.OpenSearchTestCase; -public class SparseEncodingQueryBuilderTests extends OpenSearchTestCase { +public class NeuralSparseQueryBuilderTests extends OpenSearchTestCase { private static final String FIELD_NAME = "testField"; private static final String QUERY_TEXT = "Hello world!"; @@ -73,7 +73,7 @@ public void testFromXContent_whenBuiltWithQueryText_thenBuildSuccessfully() { XContentParser contentParser = createParser(xContentBuilder); contentParser.nextToken(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = SparseEncodingQueryBuilder.fromXContent(contentParser); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = NeuralSparseQueryBuilder.fromXContent(contentParser); assertEquals(FIELD_NAME, sparseEncodingQueryBuilder.fieldName()); assertEquals(QUERY_TEXT, sparseEncodingQueryBuilder.queryText()); @@ -106,7 +106,7 @@ public void testFromXContent_whenBuiltWithOptionals_thenBuildSuccessfully() { XContentParser contentParser = createParser(xContentBuilder); contentParser.nextToken(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = SparseEncodingQueryBuilder.fromXContent(contentParser); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = NeuralSparseQueryBuilder.fromXContent(contentParser); assertEquals(FIELD_NAME, sparseEncodingQueryBuilder.fieldName()); assertEquals(QUERY_TEXT, sparseEncodingQueryBuilder.queryText()); @@ -142,7 +142,7 @@ public void testFromXContent_whenBuildWithMultipleRootFields_thenFail() { XContentParser contentParser = createParser(xContentBuilder); contentParser.nextToken(); - expectThrows(ParsingException.class, () -> SparseEncodingQueryBuilder.fromXContent(contentParser)); + expectThrows(ParsingException.class, () -> NeuralSparseQueryBuilder.fromXContent(contentParser)); } @SneakyThrows @@ -163,7 +163,7 @@ public void testFromXContent_whenBuildWithMissingQuery_thenFail() { XContentParser contentParser = createParser(xContentBuilder); contentParser.nextToken(); - expectThrows(IllegalArgumentException.class, () -> SparseEncodingQueryBuilder.fromXContent(contentParser)); + expectThrows(IllegalArgumentException.class, () -> NeuralSparseQueryBuilder.fromXContent(contentParser)); } @SneakyThrows @@ -187,7 +187,7 @@ public void testFromXContent_whenBuildWithNegativeMaxTokenScore_thenFail() { XContentParser contentParser = createParser(xContentBuilder); contentParser.nextToken(); - expectThrows(IllegalArgumentException.class, () -> SparseEncodingQueryBuilder.fromXContent(contentParser)); + expectThrows(IllegalArgumentException.class, () -> NeuralSparseQueryBuilder.fromXContent(contentParser)); } @SneakyThrows @@ -208,7 +208,7 @@ public void testFromXContent_whenBuildWithMissingModelId_thenFail() { XContentParser contentParser = createParser(xContentBuilder); contentParser.nextToken(); - expectThrows(IllegalArgumentException.class, () -> SparseEncodingQueryBuilder.fromXContent(contentParser)); + expectThrows(IllegalArgumentException.class, () -> NeuralSparseQueryBuilder.fromXContent(contentParser)); } @SneakyThrows @@ -235,13 +235,13 @@ public void testFromXContent_whenBuildWithDuplicateParameters_thenFail() { XContentParser contentParser = createParser(xContentBuilder); contentParser.nextToken(); - expectThrows(IOException.class, () -> SparseEncodingQueryBuilder.fromXContent(contentParser)); + expectThrows(IOException.class, () -> NeuralSparseQueryBuilder.fromXContent(contentParser)); } @SuppressWarnings("unchecked") @SneakyThrows public void testToXContent() { - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName(FIELD_NAME) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME) .modelId(MODEL_ID) .queryText(QUERY_TEXT) .maxTokenScore(MAX_TOKEN_SCORE); @@ -274,7 +274,7 @@ public void testToXContent() { @SneakyThrows public void testStreams() { - SparseEncodingQueryBuilder original = new SparseEncodingQueryBuilder(); + NeuralSparseQueryBuilder original = new NeuralSparseQueryBuilder(); original.fieldName(FIELD_NAME); original.queryText(QUERY_TEXT); original.maxTokenScore(MAX_TOKEN_SCORE); @@ -292,7 +292,7 @@ public void testStreams() { ) ); - SparseEncodingQueryBuilder copy = new SparseEncodingQueryBuilder(filterStreamInput); + NeuralSparseQueryBuilder copy = new NeuralSparseQueryBuilder(filterStreamInput); assertEquals(original, copy); } @@ -310,7 +310,7 @@ public void testHashAndEquals() { String queryName1 = "query-1"; String queryName2 = "query-2"; - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_baseline = new SparseEncodingQueryBuilder().fieldName(fieldName1) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_baseline = new NeuralSparseQueryBuilder().fieldName(fieldName1) .queryText(queryText1) .modelId(modelId1) .maxTokenScore(maxTokenScore1) @@ -318,7 +318,7 @@ public void testHashAndEquals() { .queryName(queryName1); // Identical to sparseEncodingQueryBuilder_baseline - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_baselineCopy = new SparseEncodingQueryBuilder().fieldName(fieldName1) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_baselineCopy = new NeuralSparseQueryBuilder().fieldName(fieldName1) .queryText(queryText1) .modelId(modelId1) .maxTokenScore(maxTokenScore1) @@ -326,12 +326,13 @@ public void testHashAndEquals() { .queryName(queryName1); // Identical to sparseEncodingQueryBuilder_baseline except default boost and query name - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_defaultBoostAndQueryName = new SparseEncodingQueryBuilder().fieldName( - fieldName1 - ).queryText(queryText1).modelId(modelId1).maxTokenScore(maxTokenScore1); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_defaultBoostAndQueryName = new NeuralSparseQueryBuilder().fieldName(fieldName1) + .queryText(queryText1) + .modelId(modelId1) + .maxTokenScore(maxTokenScore1); // Identical to sparseEncodingQueryBuilder_baseline except diff field name - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_diffFieldName = new SparseEncodingQueryBuilder().fieldName(fieldName2) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_diffFieldName = new NeuralSparseQueryBuilder().fieldName(fieldName2) .queryText(queryText1) .modelId(modelId1) .maxTokenScore(maxTokenScore1) @@ -339,7 +340,7 @@ public void testHashAndEquals() { .queryName(queryName1); // Identical to sparseEncodingQueryBuilder_baseline except diff query text - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_diffQueryText = new SparseEncodingQueryBuilder().fieldName(fieldName1) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_diffQueryText = new NeuralSparseQueryBuilder().fieldName(fieldName1) .queryText(queryText2) .modelId(modelId1) .maxTokenScore(maxTokenScore1) @@ -347,7 +348,7 @@ public void testHashAndEquals() { .queryName(queryName1); // Identical to sparseEncodingQueryBuilder_baseline except diff model ID - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_diffModelId = new SparseEncodingQueryBuilder().fieldName(fieldName1) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_diffModelId = new NeuralSparseQueryBuilder().fieldName(fieldName1) .queryText(queryText1) .modelId(modelId2) .maxTokenScore(maxTokenScore1) @@ -355,7 +356,7 @@ public void testHashAndEquals() { .queryName(queryName1); // Identical to sparseEncodingQueryBuilder_baseline except diff boost - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_diffBoost = new SparseEncodingQueryBuilder().fieldName(fieldName1) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_diffBoost = new NeuralSparseQueryBuilder().fieldName(fieldName1) .queryText(queryText1) .modelId(modelId1) .maxTokenScore(maxTokenScore1) @@ -363,7 +364,7 @@ public void testHashAndEquals() { .queryName(queryName1); // Identical to sparseEncodingQueryBuilder_baseline except diff query name - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_diffQueryName = new SparseEncodingQueryBuilder().fieldName(fieldName1) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_diffQueryName = new NeuralSparseQueryBuilder().fieldName(fieldName1) .queryText(queryText1) .modelId(modelId1) .maxTokenScore(maxTokenScore1) @@ -371,7 +372,7 @@ public void testHashAndEquals() { .queryName(queryName2); // Identical to sparseEncodingQueryBuilder_baseline except diff max token score - SparseEncodingQueryBuilder sparseEncodingQueryBuilder_diffMaxTokenScore = new SparseEncodingQueryBuilder().fieldName(fieldName1) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder_diffMaxTokenScore = new NeuralSparseQueryBuilder().fieldName(fieldName1) .queryText(queryText1) .modelId(modelId1) .maxTokenScore(maxTokenScore2) @@ -408,7 +409,7 @@ public void testHashAndEquals() { @SneakyThrows public void testRewrite_whenQueryTokensSupplierNull_thenSetQueryTokensSupplier() { - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName(FIELD_NAME) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME) .queryText(QUERY_TEXT) .modelId(MODEL_ID); Map expectedMap = Map.of("1", 1f, "2", 2f); @@ -418,7 +419,7 @@ public void testRewrite_whenQueryTokensSupplierNull_thenSetQueryTokensSupplier() listener.onResponse(List.of(Map.of("response", List.of(expectedMap)))); return null; }).when(mlCommonsClientAccessor).inferenceSentencesWithMapResult(any(), any(), any()); - SparseEncodingQueryBuilder.initialize(mlCommonsClientAccessor); + NeuralSparseQueryBuilder.initialize(mlCommonsClientAccessor); final CountDownLatch inProgressLatch = new CountDownLatch(1); QueryRewriteContext queryRewriteContext = mock(QueryRewriteContext.class); @@ -434,7 +435,7 @@ public void testRewrite_whenQueryTokensSupplierNull_thenSetQueryTokensSupplier() return null; }).when(queryRewriteContext).registerAsyncAction(any()); - SparseEncodingQueryBuilder queryBuilder = (SparseEncodingQueryBuilder) sparseEncodingQueryBuilder.doRewrite(queryRewriteContext); + NeuralSparseQueryBuilder queryBuilder = (NeuralSparseQueryBuilder) sparseEncodingQueryBuilder.doRewrite(queryRewriteContext); assertNotNull(queryBuilder.queryTokensSupplier()); assertTrue(inProgressLatch.await(5, TimeUnit.SECONDS)); assertEquals(expectedMap, queryBuilder.queryTokensSupplier().get()); @@ -442,7 +443,7 @@ public void testRewrite_whenQueryTokensSupplierNull_thenSetQueryTokensSupplier() @SneakyThrows public void testRewrite_whenQueryTokensSupplierSet_thenReturnSelf() { - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName(FIELD_NAME) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(FIELD_NAME) .queryText(QUERY_TEXT) .modelId(MODEL_ID) .queryTokensSupplier(QUERY_TOKENS_SUPPLIER); diff --git a/src/test/java/org/opensearch/neuralsearch/query/SparseEncodingQueryIT.java b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java similarity index 70% rename from src/test/java/org/opensearch/neuralsearch/query/SparseEncodingQueryIT.java rename to src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java index 2890cd5aa..672ab2940 100644 --- a/src/test/java/org/opensearch/neuralsearch/query/SparseEncodingQueryIT.java +++ b/src/test/java/org/opensearch/neuralsearch/query/NeuralSparseQueryIT.java @@ -21,16 +21,16 @@ import org.opensearch.neuralsearch.TestUtils; import org.opensearch.neuralsearch.common.BaseSparseEncodingIT; -public class SparseEncodingQueryIT extends BaseSparseEncodingIT { +public class NeuralSparseQueryIT extends BaseSparseEncodingIT { private static final String TEST_BASIC_INDEX_NAME = "test-sparse-basic-index"; - private static final String TEST_MULTI_SPARSE_ENCODING_FIELD_INDEX_NAME = "test-sparse-multi-field-index"; - private static final String TEST_TEXT_AND_SPARSE_ENCODING_FIELD_INDEX_NAME = "test-sparse-text-and-field-index"; + private static final String TEST_MULTI_NEURAL_SPARSE_FIELD_INDEX_NAME = "test-sparse-multi-field-index"; + private static final String TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME = "test-sparse-text-and-field-index"; private static final String TEST_NESTED_INDEX_NAME = "test-sparse-nested-index"; private static final String TEST_QUERY_TEXT = "Hello world a b"; - private static final String TEST_SPARSE_ENCODING_FIELD_NAME_1 = "test-sparse-encoding-1"; - private static final String TEST_SPARSE_ENCODING_FIELD_NAME_2 = "test-sparse-encoding-2"; + private static final String TEST_NEURAL_SPARSE_FIELD_NAME_1 = "test-sparse-encoding-1"; + private static final String TEST_NEURAL_SPARSE_FIELD_NAME_2 = "test-sparse-encoding-2"; private static final String TEST_TEXT_FIELD_NAME_1 = "test-text-field"; - private static final String TEST_SPARSE_ENCODING_FIELD_NAME_NESTED = "nested.sparse_encoding.field"; + private static final String TEST_NEURAL_SPARSE_FIELD_NAME_NESTED = "nested.neural_sparse.field"; private static final List TEST_TOKENS = List.of("hello", "world", "a", "b", "c"); @@ -55,7 +55,7 @@ public void tearDown() { * Tests basic query: * { * "query": { - * "sparse_encoding": { + * "neural_sparse": { * "text_sparse": { * "query_text": "Hello world a b", * "model_id": "dcsdcasd" @@ -68,9 +68,9 @@ public void tearDown() { public void testBasicQueryUsingQueryText() { initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME); String modelId = getDeployedModelId(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName( - TEST_SPARSE_ENCODING_FIELD_NAME_1 - ).queryText(TEST_QUERY_TEXT).modelId(modelId); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, sparseEncodingQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); @@ -83,7 +83,7 @@ public void testBasicQueryUsingQueryText() { * Tests basic query: * { * "query": { - * "sparse_encoding": { + * "neural_sparse": { * "text_sparse": { * "query_text": "Hello world a b", * "model_id": "dcsdcasd", @@ -98,9 +98,10 @@ public void testBasicQueryWithMaxTokenScore() { float maxTokenScore = 0.00001f; initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME); String modelId = getDeployedModelId(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName( - TEST_SPARSE_ENCODING_FIELD_NAME_1 - ).queryText(TEST_QUERY_TEXT).modelId(modelId).maxTokenScore(maxTokenScore); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId) + .maxTokenScore(maxTokenScore); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, sparseEncodingQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); @@ -122,7 +123,7 @@ public void testBasicQueryWithMaxTokenScore() { * Tests basic query: * { * "query": { - * "sparse_encoding": { + * "neural_sparse": { * "text_sparse": { * "query_text": "Hello world a b", * "model_id": "dcsdcasd", @@ -136,9 +137,10 @@ public void testBasicQueryWithMaxTokenScore() { public void testBoostQuery() { initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME); String modelId = getDeployedModelId(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName( - TEST_SPARSE_ENCODING_FIELD_NAME_1 - ).queryText(TEST_QUERY_TEXT).modelId(modelId).boost(2.0f); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId) + .boost(2.0f); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, sparseEncodingQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); @@ -156,7 +158,7 @@ public void testBoostQuery() { * "rescore": { * "query": { * "rescore_query": { - * "sparse_encoding": { + * "neural_sparse": { * "text_sparse": { * * "query_text": "Hello world a b", * * "model_id": "dcsdcasd" @@ -172,9 +174,9 @@ public void testRescoreQuery() { initializeIndexIfNotExist(TEST_BASIC_INDEX_NAME); String modelId = getDeployedModelId(); MatchAllQueryBuilder matchAllQueryBuilder = new MatchAllQueryBuilder(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName( - TEST_SPARSE_ENCODING_FIELD_NAME_1 - ).queryText(TEST_QUERY_TEXT).modelId(modelId); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId); Map searchResponseAsMap = search(TEST_BASIC_INDEX_NAME, matchAllQueryBuilder, sparseEncodingQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); @@ -189,13 +191,13 @@ public void testRescoreQuery() { * "query": { * "bool" : { * "should": [ - * "sparse_encoding": { + * "neural_sparse": { * "field1": { * "query_text": "Hello world a b", * "model_id": "dcsdcasd" * } * }, - * "sparse_encoding": { + * "neural_sparse": { * "field2": { * "query_text": "Hello world a b", * "model_id": "dcsdcasd" @@ -208,20 +210,20 @@ public void testRescoreQuery() { */ @SneakyThrows public void testBooleanQuery_withMultipleSparseEncodingQueries() { - initializeIndexIfNotExist(TEST_MULTI_SPARSE_ENCODING_FIELD_INDEX_NAME); + initializeIndexIfNotExist(TEST_MULTI_NEURAL_SPARSE_FIELD_INDEX_NAME); String modelId = getDeployedModelId(); BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder1 = new SparseEncodingQueryBuilder().fieldName( - TEST_SPARSE_ENCODING_FIELD_NAME_1 - ).queryText(TEST_QUERY_TEXT).modelId(modelId); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder2 = new SparseEncodingQueryBuilder().fieldName( - TEST_SPARSE_ENCODING_FIELD_NAME_2 - ).queryText(TEST_QUERY_TEXT).modelId(modelId); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder1 = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder2 = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_2) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId); boolQueryBuilder.should(sparseEncodingQueryBuilder1).should(sparseEncodingQueryBuilder2); - Map searchResponseAsMap = search(TEST_MULTI_SPARSE_ENCODING_FIELD_INDEX_NAME, boolQueryBuilder, 1); + Map searchResponseAsMap = search(TEST_MULTI_NEURAL_SPARSE_FIELD_INDEX_NAME, boolQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); assertEquals("1", firstInnerHit.get("_id")); @@ -235,13 +237,13 @@ public void testBooleanQuery_withMultipleSparseEncodingQueries() { * "query": { * "bool" : { * "should": [ - * "sparse_encoding": { + * "neural_sparse": { * "field1": { * "query_text": "Hello world a b", * "model_id": "dcsdcasd" * } * }, - * "sparse_encoding": { + * "neural_sparse": { * "field2": { * "query_text": "Hello world a b", * "model_id": "dcsdcasd" @@ -254,17 +256,17 @@ public void testBooleanQuery_withMultipleSparseEncodingQueries() { */ @SneakyThrows public void testBooleanQuery_withSparseEncodingAndBM25Queries() { - initializeIndexIfNotExist(TEST_TEXT_AND_SPARSE_ENCODING_FIELD_INDEX_NAME); + initializeIndexIfNotExist(TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME); String modelId = getDeployedModelId(); BoolQueryBuilder boolQueryBuilder = new BoolQueryBuilder(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName( - TEST_SPARSE_ENCODING_FIELD_NAME_1 - ).queryText(TEST_QUERY_TEXT).modelId(modelId); + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_NEURAL_SPARSE_FIELD_NAME_1) + .queryText(TEST_QUERY_TEXT) + .modelId(modelId); MatchQueryBuilder matchQueryBuilder = new MatchQueryBuilder(TEST_TEXT_FIELD_NAME_1, TEST_QUERY_TEXT); boolQueryBuilder.should(sparseEncodingQueryBuilder).should(matchQueryBuilder); - Map searchResponseAsMap = search(TEST_TEXT_AND_SPARSE_ENCODING_FIELD_INDEX_NAME, boolQueryBuilder, 1); + Map searchResponseAsMap = search(TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME, boolQueryBuilder, 1); Map firstInnerHit = getFirstInnerHit(searchResponseAsMap); assertEquals("1", firstInnerHit.get("_id")); @@ -274,41 +276,41 @@ public void testBooleanQuery_withSparseEncodingAndBM25Queries() { @SneakyThrows public void testBasicQueryUsingQueryText_whenQueryWrongFieldType_thenFail() { - initializeIndexIfNotExist(TEST_TEXT_AND_SPARSE_ENCODING_FIELD_INDEX_NAME); + initializeIndexIfNotExist(TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME); String modelId = getDeployedModelId(); - SparseEncodingQueryBuilder sparseEncodingQueryBuilder = new SparseEncodingQueryBuilder().fieldName(TEST_TEXT_FIELD_NAME_1) + NeuralSparseQueryBuilder sparseEncodingQueryBuilder = new NeuralSparseQueryBuilder().fieldName(TEST_TEXT_FIELD_NAME_1) .queryText(TEST_QUERY_TEXT) .modelId(modelId); - expectThrows(ResponseException.class, () -> search(TEST_TEXT_AND_SPARSE_ENCODING_FIELD_INDEX_NAME, sparseEncodingQueryBuilder, 1)); + expectThrows(ResponseException.class, () -> search(TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME, sparseEncodingQueryBuilder, 1)); } @SneakyThrows protected void initializeIndexIfNotExist(String indexName) { if (TEST_BASIC_INDEX_NAME.equals(indexName) && !indexExists(indexName)) { - prepareSparseEncodingIndex(indexName, List.of(TEST_SPARSE_ENCODING_FIELD_NAME_1)); - addSparseEncodingDoc(indexName, "1", List.of(TEST_SPARSE_ENCODING_FIELD_NAME_1), List.of(testRankFeaturesDoc)); + prepareSparseEncodingIndex(indexName, List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1)); + addSparseEncodingDoc(indexName, "1", List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1), List.of(testRankFeaturesDoc)); assertEquals(1, getDocCount(indexName)); } - if (TEST_MULTI_SPARSE_ENCODING_FIELD_INDEX_NAME.equals(indexName) && !indexExists(indexName)) { - prepareSparseEncodingIndex(indexName, List.of(TEST_SPARSE_ENCODING_FIELD_NAME_1, TEST_SPARSE_ENCODING_FIELD_NAME_2)); + if (TEST_MULTI_NEURAL_SPARSE_FIELD_INDEX_NAME.equals(indexName) && !indexExists(indexName)) { + prepareSparseEncodingIndex(indexName, List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1, TEST_NEURAL_SPARSE_FIELD_NAME_2)); addSparseEncodingDoc( indexName, "1", - List.of(TEST_SPARSE_ENCODING_FIELD_NAME_1, TEST_SPARSE_ENCODING_FIELD_NAME_2), + List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1, TEST_NEURAL_SPARSE_FIELD_NAME_2), List.of(testRankFeaturesDoc, testRankFeaturesDoc) ); assertEquals(1, getDocCount(indexName)); } - if (TEST_TEXT_AND_SPARSE_ENCODING_FIELD_INDEX_NAME.equals(indexName) && !indexExists(indexName)) { - prepareSparseEncodingIndex(indexName, List.of(TEST_SPARSE_ENCODING_FIELD_NAME_1)); + if (TEST_TEXT_AND_NEURAL_SPARSE_FIELD_INDEX_NAME.equals(indexName) && !indexExists(indexName)) { + prepareSparseEncodingIndex(indexName, List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1)); addSparseEncodingDoc( indexName, "1", - List.of(TEST_SPARSE_ENCODING_FIELD_NAME_1), + List.of(TEST_NEURAL_SPARSE_FIELD_NAME_1), List.of(testRankFeaturesDoc), List.of(TEST_TEXT_FIELD_NAME_1), List.of(TEST_QUERY_TEXT) @@ -317,8 +319,8 @@ protected void initializeIndexIfNotExist(String indexName) { } if (TEST_NESTED_INDEX_NAME.equals(indexName) && !indexExists(indexName)) { - prepareSparseEncodingIndex(indexName, List.of(TEST_SPARSE_ENCODING_FIELD_NAME_NESTED)); - addSparseEncodingDoc(indexName, "1", List.of(TEST_SPARSE_ENCODING_FIELD_NAME_NESTED), List.of(testRankFeaturesDoc)); + prepareSparseEncodingIndex(indexName, List.of(TEST_NEURAL_SPARSE_FIELD_NAME_NESTED)); + addSparseEncodingDoc(indexName, "1", List.of(TEST_NEURAL_SPARSE_FIELD_NAME_NESTED), List.of(testRankFeaturesDoc)); assertEquals(1, getDocCount(TEST_NESTED_INDEX_NAME)); } } diff --git a/src/test/resources/processor/SparseEncodingIndexMappings.json b/src/test/resources/processor/SparseEncodingIndexMappings.json index 87dee278e..9748e8f3d 100644 --- a/src/test/resources/processor/SparseEncodingIndexMappings.json +++ b/src/test/resources/processor/SparseEncodingIndexMappings.json @@ -23,4 +23,4 @@ } } } -} \ No newline at end of file +} diff --git a/src/test/resources/processor/SparseEncodingPipelineConfiguration.json b/src/test/resources/processor/SparseEncodingPipelineConfiguration.json index 82d13c8fe..04a4baf80 100644 --- a/src/test/resources/processor/SparseEncodingPipelineConfiguration.json +++ b/src/test/resources/processor/SparseEncodingPipelineConfiguration.json @@ -15,4 +15,4 @@ } } ] -} \ No newline at end of file +} diff --git a/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json b/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json index c45334bae..50b4b8a9b 100644 --- a/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json +++ b/src/test/resources/processor/UploadSparseEncodingModelRequestBody.json @@ -7,4 +7,4 @@ "model_group_id": "", "model_content_hash_value": "b345e9e943b62c405a8dd227ef2c46c84c5ff0a0b71b584be9132b37bce91a9a", "url": "https://github.com/opensearch-project/ml-commons/raw/main/ml-algorithms/src/test/resources/org/opensearch/ml/engine/algorithms/sparse_encoding/sparse_demo.zip" -} \ No newline at end of file +}