-
Notifications
You must be signed in to change notification settings - Fork 126
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Handle multi-vector in exact search scenario (#1399)
Signed-off-by: Heemin Kim <[email protected]> (cherry picked from commit 8c98265)
- Loading branch information
1 parent
85fb70f
commit 70adec6
Showing
9 changed files
with
449 additions
and
45 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
src/main/java/org/opensearch/knn/index/query/filtered/FilteredIdsKNNIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.query.filtered; | ||
|
||
import org.apache.lucene.index.BinaryDocValues; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.util.BytesRef; | ||
import org.opensearch.knn.index.SpaceType; | ||
import org.opensearch.knn.index.codec.util.KNNVectorSerializer; | ||
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory; | ||
|
||
import java.io.ByteArrayInputStream; | ||
import java.io.IOException; | ||
|
||
/** | ||
* Inspired by DiversifyingChildrenFloatKnnVectorQuery in lucene | ||
* https://github.com/apache/lucene/blob/7b8aece125aabff2823626d5b939abf4747f63a7/lucene/join/src/java/org/apache/lucene/search/join/DiversifyingChildrenFloatKnnVectorQuery.java#L162 | ||
* | ||
* The class is used in KNNWeight to score filtered KNN field by iterating filterIdsArray. | ||
*/ | ||
public class FilteredIdsKNNIterator { | ||
// Array of doc ids to iterate | ||
protected final int[] filterIdsArray; | ||
protected final float[] queryVector; | ||
protected final BinaryDocValues binaryDocValues; | ||
protected final SpaceType spaceType; | ||
protected float currentScore = Float.NEGATIVE_INFINITY; | ||
protected int currentPos = 0; | ||
|
||
public FilteredIdsKNNIterator( | ||
final int[] filterIdsArray, | ||
final float[] queryVector, | ||
final BinaryDocValues binaryDocValues, | ||
final SpaceType spaceType | ||
) { | ||
this.filterIdsArray = filterIdsArray; | ||
this.queryVector = queryVector; | ||
this.binaryDocValues = binaryDocValues; | ||
this.spaceType = spaceType; | ||
} | ||
|
||
/** | ||
* Advance to the next doc and update score value with score of the next doc. | ||
* DocIdSetIterator.NO_MORE_DOCS is returned when there is no more docs | ||
* | ||
* @return next doc id | ||
*/ | ||
public int nextDoc() throws IOException { | ||
if (currentPos >= filterIdsArray.length) { | ||
return DocIdSetIterator.NO_MORE_DOCS; | ||
} | ||
int docId = binaryDocValues.advance(filterIdsArray[currentPos]); | ||
currentScore = computeScore(); | ||
currentPos++; | ||
return docId; | ||
} | ||
|
||
public float score() { | ||
return currentScore; | ||
} | ||
|
||
protected float computeScore() throws IOException { | ||
final BytesRef value = binaryDocValues.binaryValue(); | ||
final ByteArrayInputStream byteStream = new ByteArrayInputStream(value.bytes, value.offset, value.length); | ||
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getSerializerByStreamContent(byteStream); | ||
final float[] vector = vectorSerializer.byteToFloatArray(byteStream); | ||
// Calculates a similarity score between the two vectors with a specified function. Higher similarity | ||
// scores correspond to closer vectors. | ||
return spaceType.getVectorSimilarityFunction().compare(queryVector, vector); | ||
} | ||
} |
59 changes: 59 additions & 0 deletions
59
src/main/java/org/opensearch/knn/index/query/filtered/NestedFilteredIdsKNNIterator.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.index.query.filtered; | ||
|
||
import org.apache.lucene.index.BinaryDocValues; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.util.BitSet; | ||
import org.opensearch.knn.index.SpaceType; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* This iterator iterates filterIdsArray to score. However, it dedupe docs per each parent doc | ||
* of which ID is set in parentBitSet and only return best child doc with the highest score. | ||
*/ | ||
public class NestedFilteredIdsKNNIterator extends FilteredIdsKNNIterator { | ||
private final BitSet parentBitSet; | ||
|
||
public NestedFilteredIdsKNNIterator( | ||
final int[] filterIdsArray, | ||
final float[] queryVector, | ||
final BinaryDocValues values, | ||
final SpaceType spaceType, | ||
final BitSet parentBitSet | ||
) { | ||
super(filterIdsArray, queryVector, values, spaceType); | ||
this.parentBitSet = parentBitSet; | ||
} | ||
|
||
/** | ||
* Advance to the next best child doc per parent and update score with the best score among child docs from the parent. | ||
* DocIdSetIterator.NO_MORE_DOCS is returned when there is no more docs | ||
* | ||
* @return next best child doc id | ||
*/ | ||
@Override | ||
public int nextDoc() throws IOException { | ||
if (currentPos >= filterIdsArray.length) { | ||
return DocIdSetIterator.NO_MORE_DOCS; | ||
} | ||
currentScore = Float.NEGATIVE_INFINITY; | ||
int currentParent = parentBitSet.nextSetBit(filterIdsArray[currentPos]); | ||
int bestChild = -1; | ||
while (currentPos < filterIdsArray.length && filterIdsArray[currentPos] < currentParent) { | ||
binaryDocValues.advance(filterIdsArray[currentPos]); | ||
float score = computeScore(); | ||
if (score > currentScore) { | ||
bestChild = filterIdsArray[currentPos]; | ||
currentScore = score; | ||
} | ||
currentPos++; | ||
} | ||
|
||
return bestChild; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
/* | ||
* Copyright OpenSearch Contributors | ||
* SPDX-License-Identifier: Apache-2.0 | ||
*/ | ||
|
||
package org.opensearch.knn.common; | ||
|
||
public class Constants { | ||
public static final String FIELD_FILTER = "filter"; | ||
public static final String FIELD_TERM = "term"; | ||
} |
Oops, something went wrong.