-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implementation for match_only_text field
Signed-off-by: Rishabh Maurya <[email protected]>
- Loading branch information
1 parent
cb39d00
commit 0f5485c
Showing
7 changed files
with
341 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
167 changes: 167 additions & 0 deletions
167
server/src/main/java/org/opensearch/index/mapper/MatchOnlyTextFieldMapper.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.mapper; | ||
|
||
import org.apache.lucene.analysis.TokenStream; | ||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; | ||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; | ||
import org.apache.lucene.document.FieldType; | ||
import org.apache.lucene.index.IndexOptions; | ||
import org.apache.lucene.index.Term; | ||
import org.apache.lucene.search.BooleanClause; | ||
import org.apache.lucene.search.BooleanQuery; | ||
import org.apache.lucene.search.MultiPhraseQuery; | ||
import org.apache.lucene.search.PhraseQuery; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.TermQuery; | ||
import org.opensearch.Version; | ||
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery; | ||
import org.opensearch.index.analysis.IndexAnalyzers; | ||
import org.opensearch.index.query.QueryShardContext; | ||
import org.opensearch.index.query.SourceFieldMatchQuery; | ||
|
||
import java.io.IOException; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class MatchOnlyTextFieldMapper extends TextFieldMapper { | ||
|
||
public static final FieldType FIELD_TYPE = new FieldType(); | ||
public static final String CONTENT_TYPE = "match_only_text"; | ||
|
||
@Override | ||
protected String contentType() { | ||
return CONTENT_TYPE; | ||
} | ||
|
||
static { | ||
FIELD_TYPE.setTokenized(true); | ||
FIELD_TYPE.setStored(false); | ||
FIELD_TYPE.setStoreTermVectors(false); | ||
FIELD_TYPE.setOmitNorms(true); | ||
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS); | ||
FIELD_TYPE.freeze(); | ||
} | ||
|
||
public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers())); | ||
|
||
protected MatchOnlyTextFieldMapper(String simpleName, FieldType fieldType, MatchOnlyTextFieldType mappedFieldType, | ||
TextFieldMapper.PrefixFieldMapper prefixFieldMapper, | ||
TextFieldMapper.PhraseFieldMapper phraseFieldMapper, | ||
MultiFields multiFields, CopyTo copyTo, Builder builder) { | ||
|
||
super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder); | ||
} | ||
|
||
public static class Builder extends TextFieldMapper.Builder { | ||
|
||
public Builder(String name, IndexAnalyzers indexAnalyzers) { | ||
super(name, indexAnalyzers); | ||
} | ||
|
||
public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) { | ||
super(name, indexCreatedVersion, indexAnalyzers); | ||
} | ||
|
||
@Override | ||
public MatchOnlyTextFieldMapper build(BuilderContext context) { | ||
FieldType fieldType = FIELD_TYPE; | ||
MatchOnlyTextFieldType tft = new MatchOnlyTextFieldType(buildFieldType(fieldType, context)); | ||
return new MatchOnlyTextFieldMapper( | ||
name, | ||
fieldType, | ||
tft, | ||
buildPrefixMapper(context, fieldType, tft), | ||
buildPhraseMapper(fieldType, tft), | ||
multiFieldsBuilder.build(this, context), | ||
copyTo.build(), | ||
this | ||
); | ||
} | ||
} | ||
|
||
public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFieldType { | ||
|
||
@Override | ||
public String typeName() { | ||
return CONTENT_TYPE; | ||
} | ||
|
||
public MatchOnlyTextFieldType(TextFieldMapper.TextFieldType tft) { | ||
super(tft.name(), tft.isSearchable(), tft.isStored(), tft.getTextSearchInfo(), tft.meta()); | ||
} | ||
|
||
@Override | ||
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext context) throws IOException { | ||
PhraseQuery phraseQuery = (PhraseQuery) super.phraseQuery(stream, slop, enablePosIncrements); | ||
BooleanQuery.Builder builder = new BooleanQuery.Builder(); | ||
for (Term term: phraseQuery.getTerms()) { | ||
builder.add(new TermQuery(term), BooleanClause.Occur.FILTER); | ||
} | ||
return new SourceFieldMatchQuery(builder.build(), phraseQuery, this, | ||
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); | ||
} | ||
|
||
@Override | ||
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException { | ||
MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements); | ||
BooleanQuery.Builder builder = new BooleanQuery.Builder(); | ||
for (Term[] terms : multiPhraseQuery.getTermArrays()) { | ||
BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); | ||
for (Term term: terms) { | ||
disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); | ||
} | ||
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); | ||
} | ||
return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this, | ||
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); | ||
} | ||
|
||
@Override | ||
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException { | ||
Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions); | ||
List<List<Term>> termArray = getTermsFromTokenStream(stream); | ||
BooleanQuery.Builder builder = new BooleanQuery.Builder(); | ||
for (int i = 0; i < termArray.size(); i++) { | ||
BooleanQuery.Builder disjunctions = new BooleanQuery.Builder(); | ||
for (Term term: termArray.get(i)) { | ||
if (i == termArray.size() - 1) { | ||
MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name()); | ||
mqb.add(term); | ||
disjunctions.add(mqb, BooleanClause.Occur.SHOULD); | ||
} else { | ||
disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD); | ||
} | ||
} | ||
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER); | ||
} | ||
return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this, | ||
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup()); | ||
} | ||
|
||
private List<List<Term>> getTermsFromTokenStream(TokenStream stream) throws IOException { | ||
final List<List<Term>> termArray = new ArrayList<>(); | ||
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); | ||
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); | ||
List<Term> currentTerms = new ArrayList<>(); | ||
stream.reset(); | ||
while (stream.incrementToken()) { | ||
if (posIncrAtt.getPositionIncrement() != 0) { | ||
if (currentTerms.isEmpty() == false) { | ||
termArray.add(List.copyOf(currentTerms)); | ||
} | ||
currentTerms.clear(); | ||
} | ||
currentTerms.add(new Term(name(), termAtt.getBytesRef())); | ||
} | ||
termArray.add(List.copyOf(currentTerms)); | ||
return termArray; | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
142 changes: 142 additions & 0 deletions
142
server/src/main/java/org/opensearch/index/query/SourceFieldMatchQuery.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
/* | ||
* SPDX-License-Identifier: Apache-2.0 | ||
* | ||
* The OpenSearch Contributors require contributions made to | ||
* this file be licensed under the Apache-2.0 license or a | ||
* compatible open source license. | ||
*/ | ||
|
||
package org.opensearch.index.query; | ||
|
||
import org.apache.lucene.index.LeafReaderContext; | ||
import org.apache.lucene.index.memory.MemoryIndex; | ||
import org.apache.lucene.search.ConstantScoreScorer; | ||
import org.apache.lucene.search.ConstantScoreWeight; | ||
import org.apache.lucene.search.DocIdSetIterator; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.Query; | ||
import org.apache.lucene.search.QueryVisitor; | ||
import org.apache.lucene.search.ScoreMode; | ||
import org.apache.lucene.search.Scorer; | ||
import org.apache.lucene.search.TwoPhaseIterator; | ||
import org.apache.lucene.search.Weight; | ||
import org.opensearch.index.mapper.MappedFieldType; | ||
import org.opensearch.index.mapper.SourceValueFetcher; | ||
import org.opensearch.search.lookup.LeafSearchLookup; | ||
import org.opensearch.search.lookup.SearchLookup; | ||
|
||
import java.io.IOException; | ||
import java.util.List; | ||
import java.util.Objects; | ||
|
||
/** | ||
* A query that matches against each document from the parent query by filtering using the source field values. | ||
* Useful to query against field type which doesn't store positional data and field is not stored/computed dynamically. | ||
*/ | ||
public class SourceFieldMatchQuery extends Query { | ||
final private Query delegateQuery; | ||
final private Query filter; | ||
final private SearchLookup lookup; | ||
final private MappedFieldType fieldType; | ||
final private SourceValueFetcher valueFetcher; | ||
|
||
/** | ||
* Constructs a SourceFieldMatchQuery. | ||
* | ||
* @param delegateQuery The parent query to use to find matches. | ||
* @param filter The query used to filter further by running against field value computed using _source field. | ||
* @param fieldType The mapped field type. | ||
* @param valueFetcher The source value fetcher. | ||
* @param lookup The search lookup. | ||
*/ | ||
public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType, | ||
SourceValueFetcher valueFetcher, SearchLookup lookup) { | ||
this.delegateQuery = delegateQuery; | ||
this.filter = filter; | ||
this.fieldType = fieldType; | ||
this.valueFetcher = valueFetcher; | ||
this.lookup = lookup; | ||
} | ||
|
||
@Override | ||
public void visit(QueryVisitor visitor) { | ||
delegateQuery.visit(visitor); | ||
} | ||
|
||
@Override | ||
public Query rewrite(IndexSearcher searcher) throws IOException { | ||
return delegateQuery.rewrite(searcher); | ||
} | ||
|
||
@Override | ||
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { | ||
|
||
Weight weight = delegateQuery.createWeight(searcher, scoreMode, boost); | ||
|
||
return new ConstantScoreWeight(this, boost) { | ||
|
||
@Override | ||
public Scorer scorer(LeafReaderContext context) throws IOException { | ||
|
||
Scorer scorer = weight.scorer(context); | ||
DocIdSetIterator approximation = scorer.iterator(); | ||
LeafSearchLookup leafSearchLookup = lookup.getLeafSearchLookup(context); | ||
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) { | ||
|
||
@Override | ||
public boolean matches() { | ||
leafSearchLookup.setDocument(approximation.docID()); | ||
List<Object> values = valueFetcher.fetchValues(leafSearchLookup.source()); | ||
MemoryIndex memoryIndex = new MemoryIndex(); | ||
for (Object value : values) { | ||
memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer()); | ||
} | ||
float score = memoryIndex.search(delegateQuery); | ||
return score > 0.0f; | ||
} | ||
|
||
@Override | ||
public float matchCost() { | ||
// arbitrary cost | ||
return 1000f; | ||
} | ||
}; | ||
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase); | ||
} | ||
|
||
@Override | ||
public boolean isCacheable(LeafReaderContext ctx) { | ||
// It is fine to cache if delegate query weight is cacheable since additional logic here | ||
// is just a filter on top of delegate query matches | ||
return weight.isCacheable(ctx); | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) { | ||
return true; | ||
} | ||
if (sameClassAs(o) == false) { | ||
return false; | ||
} | ||
SourceFieldMatchQuery other = (SourceFieldMatchQuery) o; | ||
return Objects.equals(this.delegateQuery, other.delegateQuery) | ||
&& this.filter == other.filter | ||
&& Objects.equals(this.lookup, other.lookup) | ||
&& Objects.equals(this.fieldType, other.fieldType) | ||
&& Objects.equals(this.valueFetcher, other.valueFetcher); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hash(classHash(), delegateQuery, filter, lookup, fieldType, valueFetcher); | ||
} | ||
|
||
@Override | ||
public String toString(String f) { | ||
return "SourceFieldMatchQuery (delegate query: [ " + delegateQuery.toString(f) + | ||
" ], filter query: [ " + filter.toString(f) + "])"; | ||
} | ||
} |
Oops, something went wrong.