Skip to content

Commit

Permalink
Add positive_score_impact to rank_features type (#70277)
Browse files Browse the repository at this point in the history
rank_features field type misses positive_score_impact parameter
that rank_feature type has. This adds this parameter.

Backport for #69994
Closes #68619
  • Loading branch information
mayya-sharipova authored Mar 11, 2021
1 parent db38955 commit f7cb422
Show file tree
Hide file tree
Showing 5 changed files with 131 additions and 12 deletions.
37 changes: 34 additions & 3 deletions docs/reference/mapping/types/rank-features.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,24 @@ PUT my-index-000001
"properties": {
"topics": {
"type": "rank_features" <1>
},
"negative_reviews" : {
"type": "rank_features",
"positive_score_impact": false <2>
}
}
}
}
PUT my-index-000001/_doc/1
{
"topics": { <2>
"topics": { <3>
"politics": 20,
"economics": 50.8
},
"negative_reviews": {
"1star": 10,
"2star": 100
}
}
Expand All @@ -38,21 +46,38 @@ PUT my-index-000001/_doc/2
"topics": {
"politics": 5.2,
"sports": 80.1
},
"negative_reviews": {
"1star": 1,
"2star": 10
}
}
GET my-index-000001/_search
{
"query": {
"query": { <4>
"rank_feature": {
"field": "topics.politics"
}
}
}
GET my-index-000001/_search
{
"query": { <5>
"rank_feature": {
"field": "negative_reviews.1star"
}
}
}
--------------------------------------------------

<1> Rank features fields must use the `rank_features` field type
<2> Rank features fields must be a hash with string keys and strictly positive numeric values
<2> Rank features that correlate negatively with the score need to declare it
<3> Rank features fields must be a hash with string keys and strictly positive numeric values
<4> This query ranks documents by how much they are about the "politics" topic.
<5> This query ranks documents inversely to the number of "1star" reviews they received.


NOTE: `rank_features` fields only support single-valued features and strictly
positive values. Multi-valued fields and zero or negative values will be rejected.
Expand All @@ -63,3 +88,9 @@ only be queried using <<query-dsl-rank-feature-query,`rank_feature`>> queries.
NOTE: `rank_features` fields only preserve 9 significant bits for the
precision, which translates to a relative error of about 0.4%.

Rank features that correlate negatively with the score should set
`positive_score_impact` to `false` (defaults to `true`). This will be used by
the <<query-dsl-rank-feature-query,`rank_feature`>> query to modify the scoring formula
in such a way that the score decreases with the value of the feature instead of
increasing.

Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import org.elasticsearch.search.lookup.SearchLookup;

import java.io.IOException;
import java.util.Collections;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.function.Supplier;
Expand All @@ -30,8 +30,14 @@ public class RankFeaturesFieldMapper extends FieldMapper {

public static final String CONTENT_TYPE = "rank_features";

private static RankFeaturesFieldType ft(FieldMapper in) {
return ((RankFeaturesFieldMapper)in).fieldType();
}

public static class Builder extends FieldMapper.Builder {

private final Parameter<Boolean> positiveScoreImpact
= Parameter.boolParam("positive_score_impact", false, m -> ft(m).positiveScoreImpact, true);
private final Parameter<Map<String, String>> meta = Parameter.metaParam();

public Builder(String name) {
Expand All @@ -40,30 +46,37 @@ public Builder(String name) {

@Override
protected List<Parameter<?>> getParameters() {
return Collections.singletonList(meta);
return Arrays.asList(positiveScoreImpact, meta);
}

@Override
public RankFeaturesFieldMapper build(ContentPath contentPath) {
return new RankFeaturesFieldMapper(
name, new RankFeaturesFieldType(buildFullName(contentPath), meta.getValue()),
multiFieldsBuilder.build(this, contentPath), copyTo.build());
name, new RankFeaturesFieldType(buildFullName(contentPath), meta.getValue(), positiveScoreImpact.getValue()),
multiFieldsBuilder.build(this, contentPath), copyTo.build(), positiveScoreImpact.getValue());
}
}

public static final TypeParser PARSER = new TypeParser((n, c) -> new Builder(n));

public static final class RankFeaturesFieldType extends MappedFieldType {

public RankFeaturesFieldType(String name, Map<String, String> meta) {
private final boolean positiveScoreImpact;

public RankFeaturesFieldType(String name, Map<String, String> meta, boolean positiveScoreImpact) {
super(name, false, false, false, TextSearchInfo.NONE, meta);
this.positiveScoreImpact = positiveScoreImpact;
}

@Override
public String typeName() {
return CONTENT_TYPE;
}

public boolean positiveScoreImpact() {
return positiveScoreImpact;
}

@Override
public Query existsQuery(SearchExecutionContext context) {
throw new IllegalArgumentException("[rank_features] fields do not support [exists] queries");
Expand All @@ -85,9 +98,12 @@ public Query termQuery(Object value, SearchExecutionContext context) {
}
}

private final boolean positiveScoreImpact;

private RankFeaturesFieldMapper(String simpleName, MappedFieldType mappedFieldType,
MultiFields multiFields, CopyTo copyTo) {
MultiFields multiFields, CopyTo copyTo, boolean positiveScoreImpact) {
super(simpleName, mappedFieldType, Lucene.KEYWORD_ANALYZER, multiFields, copyTo);
this.positiveScoreImpact = positiveScoreImpact;
}

@Override
Expand Down Expand Up @@ -124,6 +140,9 @@ public void parse(ParseContext context) throws IOException {
throw new IllegalArgumentException("[rank_features] fields do not support indexing multiple values for the same " +
"rank feature [" + key + "] in the same document");
}
if (positiveScoreImpact == false) {
value = 1 / value;
}
context.doc().addWithKey(key, new FeatureField(name(), feature, value));
} else {
throw new IllegalArgumentException("[rank_features] fields take hashes that map a feature to a strictly positive " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ protected void minimalMapping(XContentBuilder b) throws IOException {
}

@Override
protected void registerParameters(ParameterChecker checker) {
// no parameters to configure
protected void registerParameters(ParameterChecker checker) throws IOException {
checker.registerConflictCheck("positive_score_impact", b -> b.field("positive_score_impact", false));
}

@Override
Expand Down Expand Up @@ -79,6 +79,33 @@ public void testDefaults() throws Exception {
assertTrue(freq1 < freq2);
}

public void testNegativeScoreImpact() throws Exception {
DocumentMapper mapper = createDocumentMapper(
fieldMapping(b -> b.field("type", "rank_features").field("positive_score_impact", false))
);

ParsedDocument doc1 = mapper.parse(source(this::writeField));

IndexableField[] fields = doc1.rootDoc().getFields("field");
assertEquals(2, fields.length);
assertThat(fields[0], Matchers.instanceOf(FeatureField.class));
FeatureField featureField1 = null;
FeatureField featureField2 = null;
for (IndexableField field : fields) {
if (field.stringValue().equals("ten")) {
featureField1 = (FeatureField)field;
} else if (field.stringValue().equals("twenty")) {
featureField2 = (FeatureField)field;
} else {
throw new UnsupportedOperationException();
}
}

int freq1 = RankFeatureFieldMapperTests.getFrequency(featureField1.tokenStream(null, null));
int freq2 = RankFeatureFieldMapperTests.getFrequency(featureField2.tokenStream(null, null));
assertTrue(freq1 > freq2);
}

public void testRejectMultiValuedFields() throws MapperParsingException, IOException {
DocumentMapper mapper = createDocumentMapper(mapping(b -> {
b.startObject("field").field("type", "rank_features").endObject();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
public class RankFeaturesFieldTypeTests extends FieldTypeTestCase {

public void testIsNotAggregatable() {
MappedFieldType fieldType = new RankFeaturesFieldMapper.RankFeaturesFieldType("field", Collections.emptyMap());
MappedFieldType fieldType = new RankFeaturesFieldMapper.RankFeaturesFieldType("field", Collections.emptyMap(), true);
assertFalse(fieldType.isAggregatable());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@ setup:
properties:
tags:
type: rank_features
negative_reviews:
type: rank_features
positive_score_impact: false


- do:
index:
Expand All @@ -22,6 +26,9 @@ setup:
tags:
foo: 3
bar: 5
negative_reviews:
1star: 10
2star: 1

- do:
index:
Expand All @@ -31,6 +38,9 @@ setup:
tags:
bar: 6
quux: 10
negative_reviews:
1star: 1
2star: 10

- do:
indices.refresh: {}
Expand Down Expand Up @@ -129,3 +139,35 @@ setup:
hits.hits.1._id: "1"
- match:
hits.hits.1._score: 5.0


---
"Linear negative impact":

- do:
search:
index: test
body:
query:
rank_feature:
field: negative_reviews.1star
linear: {}

- match:
hits.hits.0._id: "2"
- match:
hits.hits.1._id: "1"

- do:
search:
index: test
body:
query:
rank_feature:
field: negative_reviews.2star
linear: {}

- match:
hits.hits.0._id: "1"
- match:
hits.hits.1._id: "2"

0 comments on commit f7cb422

Please sign in to comment.