-
Notifications
You must be signed in to change notification settings - Fork 129
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merge efficient filtering from feature branch #588
Merged
Merged
Changes from 7 commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
4c8cf93
Adding efficient filtering (#515)
martin-gaievski 47b9ad4
Adding more tests and logs (#538)
martin-gaievski 2e18ae8
Adding serialization for filter field in KnnQueryBuilder (#564)
martin-gaievski 9b32e17
Read min cluster version directly from DiscoveryNodes (#581)
martin-gaievski 44f10de
Refactor kNN codec related classes (#582)
martin-gaievski 52e2b6b
Adding stat for query with filter (#587)
martin-gaievski a2b92b1
Rename context class, adjust lucene IT
martin-gaievski d015c35
Adding code comments
martin-gaievski File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
86 changes: 86 additions & 0 deletions
86
qa/rolling-upgrade/src/test/java/org/opensearch/knn/bwc/LuceneFilteringIT.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,86 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.bwc; | ||
|
||
import org.hamcrest.MatcherAssert; | ||
import org.opensearch.knn.TestUtils; | ||
import org.opensearch.knn.index.query.KNNQueryBuilder; | ||
import org.opensearch.index.query.QueryBuilders; | ||
import org.opensearch.index.query.TermQueryBuilder; | ||
|
||
import org.opensearch.client.Request; | ||
import org.opensearch.client.ResponseException; | ||
import org.opensearch.common.Strings; | ||
import org.opensearch.common.xcontent.ToXContent; | ||
import org.opensearch.common.xcontent.XContentBuilder; | ||
import org.opensearch.common.xcontent.XContentFactory; | ||
|
||
import java.io.IOException; | ||
|
||
import static org.hamcrest.CoreMatchers.anyOf; | ||
import static org.hamcrest.CoreMatchers.containsString; | ||
import static org.opensearch.knn.TestUtils.NODES_BWC_CLUSTER; | ||
import static org.opensearch.knn.common.KNNConstants.LUCENE_NAME; | ||
import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW; | ||
|
||
/** | ||
* Tests scenarios specific to filtering functionality in k-NN in case Lucene is set as an engine | ||
*/ | ||
public class LuceneFilteringIT extends AbstractRollingUpgradeTestCase { | ||
private static final String TEST_FIELD = "test-field"; | ||
private static final int DIMENSIONS = 50; | ||
private static final int K = 10; | ||
private static final int NUM_DOCS = 100; | ||
private static final TermQueryBuilder TERM_QUERY = QueryBuilders.termQuery("_id", "100"); | ||
|
||
public void testLuceneFiltering() throws Exception { | ||
waitForClusterHealthGreen(NODES_BWC_CLUSTER); | ||
float[] queryVector = TestUtils.getQueryVectors(1, DIMENSIONS, NUM_DOCS, true)[0]; | ||
switch (getClusterType()) { | ||
case OLD: | ||
createKnnIndex( | ||
testIndex, | ||
getKNNDefaultIndexSettings(), | ||
createKnnIndexMapping(TEST_FIELD, DIMENSIONS, METHOD_HNSW, LUCENE_NAME) | ||
); | ||
bulkAddKnnDocs(testIndex, TEST_FIELD, TestUtils.getIndexVectors(NUM_DOCS, DIMENSIONS, true), NUM_DOCS); | ||
validateSearchKNNIndexFailed(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, K, TERM_QUERY), K); | ||
break; | ||
case MIXED: | ||
validateSearchKNNIndexFailed(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, K, TERM_QUERY), K); | ||
break; | ||
case UPGRADED: | ||
searchKNNIndex(testIndex, new KNNQueryBuilder(TEST_FIELD, queryVector, K, TERM_QUERY), K); | ||
deleteKNNIndex(testIndex); | ||
break; | ||
} | ||
} | ||
|
||
private void validateSearchKNNIndexFailed(String index, KNNQueryBuilder knnQueryBuilder, int resultSize) throws IOException { | ||
XContentBuilder builder = XContentFactory.jsonBuilder().startObject().startObject("query"); | ||
knnQueryBuilder.doXContent(builder, ToXContent.EMPTY_PARAMS); | ||
builder.endObject().endObject(); | ||
|
||
Request request = new Request("POST", "/" + index + "/_search"); | ||
|
||
request.addParameter("size", Integer.toString(resultSize)); | ||
request.addParameter("explain", Boolean.toString(true)); | ||
request.addParameter("search_type", "query_then_fetch"); | ||
request.setJsonEntity(Strings.toString(builder)); | ||
|
||
Exception exception = expectThrows(ResponseException.class, () -> client().performRequest(request)); | ||
// assert for two possible exception messages, fist one can come from current version in case serialized request is coming from | ||
// lower version, | ||
// second exception is vise versa, when lower version node receives request with filter field from higher version | ||
MatcherAssert.assertThat( | ||
exception.getMessage(), | ||
anyOf( | ||
containsString("filter field is supported from version"), | ||
containsString("[knn] unknown token [START_OBJECT] after [filter]") | ||
) | ||
); | ||
} | ||
} |
58 changes: 58 additions & 0 deletions
58
src/main/java/org/opensearch/knn/index/KNNClusterUtil.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index; | ||
|
||
import lombok.AccessLevel; | ||
import lombok.NoArgsConstructor; | ||
import lombok.extern.log4j.Log4j2; | ||
import org.opensearch.Version; | ||
import org.opensearch.cluster.service.ClusterService; | ||
|
||
/** | ||
* Class abstracts information related to underlying OpenSearch cluster | ||
*/ | ||
@NoArgsConstructor(access = AccessLevel.PRIVATE) | ||
@Log4j2 | ||
public class KNNClusterUtil { | ||
|
||
private ClusterService clusterService; | ||
private static KNNClusterUtil instance; | ||
|
||
/** | ||
* Return instance of the cluster context, must be initialized first for proper usage | ||
* @return instance of cluster context | ||
*/ | ||
public static synchronized KNNClusterUtil instance() { | ||
if (instance == null) { | ||
instance = new KNNClusterUtil(); | ||
} | ||
return instance; | ||
} | ||
|
||
/** | ||
* Initializes instance of cluster context by injecting dependencies | ||
* @param clusterService | ||
*/ | ||
public void initialize(final ClusterService clusterService) { | ||
this.clusterService = clusterService; | ||
} | ||
|
||
/** | ||
* Return minimal OpenSearch version based on all nodes currently discoverable in the cluster | ||
* @return minimal installed OpenSearch version, default to Version.CURRENT which is typically the latest version | ||
*/ | ||
public Version getClusterMinVersion() { | ||
try { | ||
return this.clusterService.state().getNodes().getMinNodeVersion(); | ||
} catch (Exception exception) { | ||
log.error( | ||
String.format("Failed to get cluster minimum node version, returning current node version %s instead.", Version.CURRENT), | ||
exception | ||
); | ||
return Version.CURRENT; | ||
} | ||
} | ||
} |
79 changes: 79 additions & 0 deletions
79
src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.codec; | ||
|
||
import lombok.AllArgsConstructor; | ||
import lombok.extern.log4j.Log4j2; | ||
import org.apache.lucene.codecs.KnnVectorsFormat; | ||
import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat; | ||
import org.opensearch.index.mapper.MapperService; | ||
import org.opensearch.knn.common.KNNConstants; | ||
import org.opensearch.knn.index.mapper.KNNVectorFieldMapper; | ||
|
||
import java.util.Map; | ||
import java.util.Optional; | ||
import java.util.function.BiFunction; | ||
import java.util.function.Supplier; | ||
|
||
/** | ||
* Base class for PerFieldKnnVectorsFormat, builds KnnVectorsFormat based on specific Lucene version | ||
*/ | ||
@AllArgsConstructor | ||
@Log4j2 | ||
public abstract class BasePerFieldKnnVectorsFormat extends PerFieldKnnVectorsFormat { | ||
|
||
private final Optional<MapperService> mapperService; | ||
private final int defaultMaxConnections; | ||
private final int defaultBeamWidth; | ||
private final Supplier<KnnVectorsFormat> defaultFormatSupplier; | ||
private final BiFunction<Integer, Integer, KnnVectorsFormat> formatSupplier; | ||
|
||
@Override | ||
public KnnVectorsFormat getKnnVectorsFormatForField(final String field) { | ||
if (isKnnVectorFieldType(field) == false) { | ||
log.debug( | ||
"Initialize KNN vector format for field [{}] with default params [max_connections] = \"{}\" and [beam_width] = \"{}\"", | ||
field, | ||
defaultMaxConnections, | ||
defaultBeamWidth | ||
); | ||
return defaultFormatSupplier.get(); | ||
} | ||
var type = (KNNVectorFieldMapper.KNNVectorFieldType) mapperService.orElseThrow( | ||
() -> new IllegalStateException( | ||
String.format("Cannot read field type for field [%s] because mapper service is not available", field) | ||
) | ||
).fieldType(field); | ||
var params = type.getKnnMethodContext().getMethodComponent().getParameters(); | ||
int maxConnections = getMaxConnections(params); | ||
int beamWidth = getBeamWidth(params); | ||
log.debug( | ||
"Initialize KNN vector format for field [{}] with params [max_connections] = \"{}\" and [beam_width] = \"{}\"", | ||
field, | ||
maxConnections, | ||
beamWidth | ||
); | ||
return formatSupplier.apply(maxConnections, beamWidth); | ||
} | ||
|
||
private boolean isKnnVectorFieldType(final String field) { | ||
return mapperService.isPresent() && mapperService.get().fieldType(field) instanceof KNNVectorFieldMapper.KNNVectorFieldType; | ||
} | ||
|
||
private int getMaxConnections(final Map<String, Object> params) { | ||
if (params != null && params.containsKey(KNNConstants.METHOD_PARAMETER_M)) { | ||
return (int) params.get(KNNConstants.METHOD_PARAMETER_M); | ||
} | ||
return defaultMaxConnections; | ||
} | ||
|
||
private int getBeamWidth(final Map<String, Object> params) { | ||
if (params != null && params.containsKey(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION)) { | ||
return (int) params.get(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION); | ||
} | ||
return defaultBeamWidth; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What happens when we start upgrading from 2.4 to 2.5 or 3.x?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we'll need to disable this test for higher versions similarly to what we're doing for some other IT, this will work for cases when previous version doesn't have filtering and next does have it