Skip to content

Commit

Permalink
Implementation for match_only_text field
Browse files Browse the repository at this point in the history
Signed-off-by: Rishabh Maurya <[email protected]>
  • Loading branch information
rishabhmaurya committed Oct 31, 2023
1 parent cb39d00 commit 0f5485c
Show file tree
Hide file tree
Showing 7 changed files with 341 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -352,23 +352,34 @@ public Query existsQuery(QueryShardContext context) {
}

public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
return phraseQuery(stream, slop, enablePositionIncrements, null);
}

public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException {
throw new IllegalArgumentException(
"Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException {
return multiPhraseQuery(stream, slop, enablePositionIncrements, null);
}

public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException {
throw new IllegalArgumentException(
"Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException {
return phrasePrefixQuery(stream, slop, maxExpansions, null);
}

public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException {
throw new IllegalArgumentException(
"Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
);
}

public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) {
throw new IllegalArgumentException(
"Can only use span prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.mapper;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.opensearch.Version;
import org.opensearch.common.lucene.search.MultiPhrasePrefixQuery;
import org.opensearch.index.analysis.IndexAnalyzers;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.query.SourceFieldMatchQuery;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class MatchOnlyTextFieldMapper extends TextFieldMapper {

public static final FieldType FIELD_TYPE = new FieldType();
public static final String CONTENT_TYPE = "match_only_text";

@Override
protected String contentType() {
return CONTENT_TYPE;
}

static {
FIELD_TYPE.setTokenized(true);
FIELD_TYPE.setStored(false);
FIELD_TYPE.setStoreTermVectors(false);
FIELD_TYPE.setOmitNorms(true);
FIELD_TYPE.setIndexOptions(IndexOptions.DOCS);
FIELD_TYPE.freeze();
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n, c.indexVersionCreated(), c.getIndexAnalyzers()));

protected MatchOnlyTextFieldMapper(String simpleName, FieldType fieldType, MatchOnlyTextFieldType mappedFieldType,
TextFieldMapper.PrefixFieldMapper prefixFieldMapper,
TextFieldMapper.PhraseFieldMapper phraseFieldMapper,
MultiFields multiFields, CopyTo copyTo, Builder builder) {

super(simpleName, fieldType, mappedFieldType, prefixFieldMapper, phraseFieldMapper, multiFields, copyTo, builder);
}

public static class Builder extends TextFieldMapper.Builder {

public Builder(String name, IndexAnalyzers indexAnalyzers) {
super(name, indexAnalyzers);
}

public Builder(String name, Version indexCreatedVersion, IndexAnalyzers indexAnalyzers) {
super(name, indexCreatedVersion, indexAnalyzers);
}

@Override
public MatchOnlyTextFieldMapper build(BuilderContext context) {
FieldType fieldType = FIELD_TYPE;
MatchOnlyTextFieldType tft = new MatchOnlyTextFieldType(buildFieldType(fieldType, context));
return new MatchOnlyTextFieldMapper(
name,
fieldType,
tft,
buildPrefixMapper(context, fieldType, tft),
buildPhraseMapper(fieldType, tft),
multiFieldsBuilder.build(this, context),
copyTo.build(),
this
);
}
}

public static final class MatchOnlyTextFieldType extends TextFieldMapper.TextFieldType {

@Override
public String typeName() {
return CONTENT_TYPE;
}

public MatchOnlyTextFieldType(TextFieldMapper.TextFieldType tft) {
super(tft.name(), tft.isSearchable(), tft.isStored(), tft.getTextSearchInfo(), tft.meta());
}

@Override
public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements, QueryShardContext context) throws IOException {
PhraseQuery phraseQuery = (PhraseQuery) super.phraseQuery(stream, slop, enablePosIncrements);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Term term: phraseQuery.getTerms()) {
builder.add(new TermQuery(term), BooleanClause.Occur.FILTER);
}
return new SourceFieldMatchQuery(builder.build(), phraseQuery, this,
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup());
}

@Override
public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements, QueryShardContext context) throws IOException {
MultiPhraseQuery multiPhraseQuery = (MultiPhraseQuery) super.multiPhraseQuery(stream, slop, enablePositionIncrements);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (Term[] terms : multiPhraseQuery.getTermArrays()) {
BooleanQuery.Builder disjunctions = new BooleanQuery.Builder();
for (Term term: terms) {
disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
}
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
}
return new SourceFieldMatchQuery(builder.build(), multiPhraseQuery, this,
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup());
}

@Override
public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, QueryShardContext context) throws IOException {
Query phrasePrefixQuery = super.phrasePrefixQuery(stream, slop, maxExpansions);
List<List<Term>> termArray = getTermsFromTokenStream(stream);
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (int i = 0; i < termArray.size(); i++) {
BooleanQuery.Builder disjunctions = new BooleanQuery.Builder();
for (Term term: termArray.get(i)) {
if (i == termArray.size() - 1) {
MultiPhrasePrefixQuery mqb = new MultiPhrasePrefixQuery(name());
mqb.add(term);
disjunctions.add(mqb, BooleanClause.Occur.SHOULD);
} else {
disjunctions.add(new TermQuery(term), BooleanClause.Occur.SHOULD);
}
}
builder.add(disjunctions.build(), BooleanClause.Occur.FILTER);
}
return new SourceFieldMatchQuery(builder.build(), phrasePrefixQuery, this,
(SourceValueFetcher) this.valueFetcher(context, context.lookup(), null), context.lookup());
}

private List<List<Term>> getTermsFromTokenStream(TokenStream stream) throws IOException {
final List<List<Term>> termArray = new ArrayList<>();
TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
List<Term> currentTerms = new ArrayList<>();
stream.reset();
while (stream.incrementToken()) {
if (posIncrAtt.getPositionIncrement() != 0) {
if (currentTerms.isEmpty() == false) {
termArray.add(List.copyOf(currentTerms));
}
currentTerms.clear();
}
currentTerms.add(new Term(name(), termAtt.getBytesRef()));
}
termArray.add(List.copyOf(currentTerms));
return termArray;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ protected List<Parameter<?>> getParameters() {
);
}

private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) {
protected TextFieldType buildFieldType(FieldType fieldType, BuilderContext context) {
NamedAnalyzer indexAnalyzer = analyzers.getIndexAnalyzer();
NamedAnalyzer searchAnalyzer = analyzers.getSearchAnalyzer();
NamedAnalyzer searchQuoteAnalyzer = analyzers.getSearchQuoteAnalyzer();
Expand All @@ -420,7 +420,7 @@ private TextFieldType buildFieldType(FieldType fieldType, BuilderContext context
return ft;
}

private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) {
protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fieldType, TextFieldType tft) {
if (indexPrefixes.get() == null) {
return null;
}
Expand Down Expand Up @@ -454,7 +454,7 @@ private PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType fi
return new PrefixFieldMapper(pft, prefixFieldType);
}

private PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) {
protected PhraseFieldMapper buildPhraseMapper(FieldType fieldType, TextFieldType parent) {
if (indexPhrases.get() == false) {
return null;
}
Expand Down Expand Up @@ -683,7 +683,7 @@ public Query existsQuery(QueryShardContext context) {
*
* @opensearch.internal
*/
private static final class PhraseFieldMapper extends FieldMapper {
protected static final class PhraseFieldMapper extends FieldMapper {

PhraseFieldMapper(FieldType fieldType, PhraseFieldType mappedFieldType) {
super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty());
Expand All @@ -710,7 +710,7 @@ protected String contentType() {
*
* @opensearch.internal
*/
private static final class PrefixFieldMapper extends FieldMapper {
protected static final class PrefixFieldMapper extends FieldMapper {

protected PrefixFieldMapper(FieldType fieldType, PrefixFieldType mappedFieldType) {
super(mappedFieldType.name(), fieldType, mappedFieldType, MultiFields.empty(), CopyTo.empty());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.query;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.SourceValueFetcher;
import org.opensearch.search.lookup.LeafSearchLookup;
import org.opensearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.List;
import java.util.Objects;

/**
* A query that matches against each document from the parent query by filtering using the source field values.
* Useful to query against field type which doesn't store positional data and field is not stored/computed dynamically.
*/
public class SourceFieldMatchQuery extends Query {
final private Query delegateQuery;
final private Query filter;
final private SearchLookup lookup;
final private MappedFieldType fieldType;
final private SourceValueFetcher valueFetcher;

/**
* Constructs a SourceFieldMatchQuery.
*
* @param delegateQuery The parent query to use to find matches.
* @param filter The query used to filter further by running against field value computed using _source field.
* @param fieldType The mapped field type.
* @param valueFetcher The source value fetcher.
* @param lookup The search lookup.
*/
public SourceFieldMatchQuery(Query delegateQuery, Query filter, MappedFieldType fieldType,
SourceValueFetcher valueFetcher, SearchLookup lookup) {
this.delegateQuery = delegateQuery;
this.filter = filter;
this.fieldType = fieldType;
this.valueFetcher = valueFetcher;
this.lookup = lookup;
}

@Override
public void visit(QueryVisitor visitor) {
delegateQuery.visit(visitor);
}

@Override
public Query rewrite(IndexSearcher searcher) throws IOException {
return delegateQuery.rewrite(searcher);
}

@Override
public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException {

Weight weight = delegateQuery.createWeight(searcher, scoreMode, boost);

return new ConstantScoreWeight(this, boost) {

@Override
public Scorer scorer(LeafReaderContext context) throws IOException {

Scorer scorer = weight.scorer(context);
DocIdSetIterator approximation = scorer.iterator();
LeafSearchLookup leafSearchLookup = lookup.getLeafSearchLookup(context);
TwoPhaseIterator twoPhase = new TwoPhaseIterator(approximation) {

@Override
public boolean matches() {
leafSearchLookup.setDocument(approximation.docID());
List<Object> values = valueFetcher.fetchValues(leafSearchLookup.source());
MemoryIndex memoryIndex = new MemoryIndex();
for (Object value : values) {
memoryIndex.addField(fieldType.name(), (String) value, fieldType.indexAnalyzer());
}
float score = memoryIndex.search(delegateQuery);
return score > 0.0f;
}

@Override
public float matchCost() {
// arbitrary cost
return 1000f;
}
};
return new ConstantScoreScorer(this, score(), scoreMode, twoPhase);
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
// It is fine to cache if delegate query weight is cacheable since additional logic here
// is just a filter on top of delegate query matches
return weight.isCacheable(ctx);
}
};
}

@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (sameClassAs(o) == false) {
return false;
}
SourceFieldMatchQuery other = (SourceFieldMatchQuery) o;
return Objects.equals(this.delegateQuery, other.delegateQuery)
&& this.filter == other.filter
&& Objects.equals(this.lookup, other.lookup)
&& Objects.equals(this.fieldType, other.fieldType)
&& Objects.equals(this.valueFetcher, other.valueFetcher);
}

@Override
public int hashCode() {
return Objects.hash(classHash(), delegateQuery, filter, lookup, fieldType, valueFetcher);
}

@Override
public String toString(String f) {
return "SourceFieldMatchQuery (delegate query: [ " + delegateQuery.toString(f) +
" ], filter query: [ " + filter.toString(f) + "])";
}
}
Loading

0 comments on commit 0f5485c

Please sign in to comment.