Skip to content

Commit

Permalink
use the correct type to widen the sort fields when merging top docs (#…
Browse files Browse the repository at this point in the history
…16881) (#16989)

* use the correct type to widen the sort fields when merging top docs



* fix



* apply commments



* changelog



* add more tests



---------


(cherry picked from commit bbcbd21)

Signed-off-by: panguixin <[email protected]>
Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
  • Loading branch information
1 parent 486f392 commit da323e5
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 23 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Indexed IP field supports `terms_query` with more than 1025 IP masks [#16391](https://github.com/opensearch-project/OpenSearch/pull/16391)
- Make entries for dependencies from server/build.gradle to gradle version catalog ([#16707](https://github.com/opensearch-project/OpenSearch/pull/16707))
- Allow extended plugins to be optional ([#16909](https://github.com/opensearch-project/OpenSearch/pull/16909))
- Use the correct type to widen the sort fields when merging top docs ([#16881](https://github.com/opensearch-project/OpenSearch/pull/16881))

### Deprecated
- Performing update operation with default pipeline or final pipeline is deprecated ([#16712](https://github.com/opensearch-project/OpenSearch/pull/16712))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.opensearch.common.Numbers;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.xcontent.XContentFactory;
import org.opensearch.common.xcontent.XContentType;
import org.opensearch.core.rest.RestStatus;
import org.opensearch.core.xcontent.MediaTypeRegistry;
import org.opensearch.core.xcontent.XContentBuilder;
Expand All @@ -63,6 +64,7 @@
import org.opensearch.search.SearchHit;
import org.opensearch.search.SearchHits;
import org.opensearch.test.InternalSettingsPlugin;
import org.opensearch.test.OpenSearchTestCase;
import org.opensearch.test.ParameterizedDynamicSettingsOpenSearchIntegTestCase;
import org.hamcrest.Matchers;

Expand All @@ -82,7 +84,9 @@
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Function;
import java.util.function.Supplier;

import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.opensearch.index.query.QueryBuilders.functionScoreQuery;
Expand Down Expand Up @@ -2609,4 +2613,99 @@ public void testSimpleSortsPoints() throws Exception {

assertThat(searchResponse.toString(), not(containsString("error")));
}

public void testSortMixedIntegerNumericFields() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(3);
AtomicInteger counter = new AtomicInteger();
index("long", () -> Long.MAX_VALUE - counter.getAndIncrement());
index("integer", () -> Integer.MAX_VALUE - counter.getAndIncrement());
SearchResponse searchResponse = client().prepareSearch("long", "integer")
.setQuery(matchAllQuery())
.setSize(10)
.addSort(SortBuilders.fieldSort("field").order(SortOrder.ASC).sortMode(SortMode.MAX))
.get();
assertNoFailures(searchResponse);
long[] sortValues = new long[10];
for (int i = 0; i < 10; i++) {
sortValues[i] = ((Number) searchResponse.getHits().getAt(i).getSortValues()[0]).longValue();
}
for (int i = 1; i < 10; i++) {
assertThat(Arrays.toString(sortValues), sortValues[i - 1], lessThan(sortValues[i]));
}
}

public void testSortMixedFloatingNumericFields() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(3);
AtomicInteger counter = new AtomicInteger();
index("double", () -> 100.5 - counter.getAndIncrement());
counter.set(0);
index("float", () -> 200.5 - counter.getAndIncrement());
counter.set(0);
index("half_float", () -> 300.5 - counter.getAndIncrement());
SearchResponse searchResponse = client().prepareSearch("double", "float", "half_float")
.setQuery(matchAllQuery())
.setSize(15)
.addSort(SortBuilders.fieldSort("field").order(SortOrder.ASC).sortMode(SortMode.MAX))
.get();
assertNoFailures(searchResponse);
double[] sortValues = new double[15];
for (int i = 0; i < 15; i++) {
sortValues[i] = ((Number) searchResponse.getHits().getAt(i).getSortValues()[0]).doubleValue();
}
for (int i = 1; i < 15; i++) {
assertThat(Arrays.toString(sortValues), sortValues[i - 1], lessThan(sortValues[i]));
}
}

public void testSortMixedFloatingAndIntegerNumericFields() throws Exception {
internalCluster().ensureAtLeastNumDataNodes(3);
index("long", () -> randomLongBetween(0, (long) 2E53 - 1));
index("integer", OpenSearchTestCase::randomInt);
index("double", OpenSearchTestCase::randomDouble);
index("float", () -> randomFloat());
boolean asc = randomBoolean();
SearchResponse searchResponse = client().prepareSearch("long", "integer", "double", "float")
.setQuery(matchAllQuery())
.setSize(20)
.addSort(SortBuilders.fieldSort("field").order(asc ? SortOrder.ASC : SortOrder.DESC).sortMode(SortMode.MAX))
.get();
assertNoFailures(searchResponse);
double[] sortValues = new double[20];
for (int i = 0; i < 20; i++) {
sortValues[i] = ((Number) searchResponse.getHits().getAt(i).getSortValues()[0]).doubleValue();
}
if (asc) {
for (int i = 1; i < 20; i++) {
assertThat(Arrays.toString(sortValues), sortValues[i - 1], lessThanOrEqualTo(sortValues[i]));
}
} else {
for (int i = 1; i < 20; i++) {
assertThat(Arrays.toString(sortValues), sortValues[i - 1], greaterThanOrEqualTo(sortValues[i]));
}
}
}

private void index(String type, Supplier<Number> valueSupplier) throws Exception {
assertAcked(
prepareCreate(type).setMapping(
XContentFactory.jsonBuilder()
.startObject()
.startObject("properties")
.startObject("field")
.field("type", type)
.endObject()
.endObject()
.endObject()
).setSettings(Settings.builder().put("index.number_of_shards", 3).put("index.number_of_replicas", 0))
);
ensureGreen(type);
for (int i = 0; i < 5; i++) {
client().prepareIndex(type)
.setId(Integer.toString(i))
.setSource("{\"field\" : " + valueSupplier.get() + " }", XContentType.JSON)
.get();
}
client().admin().indices().prepareRefresh(type).get();
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.opensearch.common.lucene.search.TopDocsAndMaxScore;
import org.opensearch.core.common.breaker.CircuitBreaker;
import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
import org.opensearch.index.fielddata.IndexFieldData;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.SearchHit;
import org.opensearch.search.SearchHits;
Expand Down Expand Up @@ -604,36 +605,51 @@ private static void validateMergeSortValueFormats(Collection<? extends SearchPha
* support sort optimization, we removed type widening there and taking care here during merging.
* More details here https://github.com/opensearch-project/OpenSearch/issues/6326
*/
// TODO: should we check the compatibility between types
private static Sort createSort(TopFieldDocs[] topFieldDocs) {
final SortField[] firstTopDocFields = topFieldDocs[0].fields;
final SortField[] newFields = new SortField[firstTopDocFields.length];
for (int fieldIndex = 0; fieldIndex < firstTopDocFields.length; fieldIndex++) {
SortField.Type firstType = getSortType(firstTopDocFields[fieldIndex]);
newFields[fieldIndex] = firstTopDocFields[fieldIndex];
if (SortedWiderNumericSortField.isTypeSupported(firstType) == false) {
continue;
}

for (int i = 0; i < firstTopDocFields.length; i++) {
final SortField delegate = firstTopDocFields[i];
final SortField.Type type = delegate instanceof SortedNumericSortField
? ((SortedNumericSortField) delegate).getNumericType()
: delegate.getType();
boolean requireWiden = false;
boolean isFloat = firstType == SortField.Type.FLOAT || firstType == SortField.Type.DOUBLE;
for (int shardIndex = 1; shardIndex < topFieldDocs.length; shardIndex++) {
final SortField sortField = topFieldDocs[shardIndex].fields[fieldIndex];
SortField.Type sortType = getSortType(sortField);
if (SortedWiderNumericSortField.isTypeSupported(sortType) == false) {
// throw exception if sortType is not CUSTOM?
// skip this shard or do not widen?
requireWiden = false;
break;
}
requireWiden = requireWiden || sortType != firstType;
isFloat = isFloat || sortType == SortField.Type.FLOAT || sortType == SortField.Type.DOUBLE;
}

if (SortedWiderNumericSortField.isTypeSupported(type) && isSortWideningRequired(topFieldDocs, i)) {
newFields[i] = new SortedWiderNumericSortField(delegate.getField(), type, delegate.getReverse());
} else {
newFields[i] = firstTopDocFields[i];
if (requireWiden) {
newFields[fieldIndex] = new SortedWiderNumericSortField(
firstTopDocFields[fieldIndex].getField(),
isFloat ? SortField.Type.DOUBLE : SortField.Type.LONG,
firstTopDocFields[fieldIndex].getReverse()
);
}
}
return new Sort(newFields);
}

/**
* It will compare respective SortField between shards to see if any shard results have different
* field mapping type, accordingly it will decide to widen the sort fields.
*/
private static boolean isSortWideningRequired(TopFieldDocs[] topFieldDocs, int sortFieldindex) {
for (int i = 0; i < topFieldDocs.length - 1; i++) {
if (!topFieldDocs[i].fields[sortFieldindex].equals(topFieldDocs[i + 1].fields[sortFieldindex])) {
return true;
}
private static SortField.Type getSortType(SortField sortField) {
if (sortField.getComparatorSource() instanceof IndexFieldData.XFieldComparatorSource) {
return ((IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource()).reducedType();
} else {
return sortField instanceof SortedNumericSortField
? ((SortedNumericSortField) sortField).getNumericType()
: sortField.getType();
}
return false;
}

/*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,18 @@
import org.apache.lucene.search.comparators.NumericComparator;

import java.io.IOException;
import java.util.Comparator;

/**
* Sorted numeric field for wider sort types,
* to help sorting two different numeric types.
* Sorted numeric field for wider sort types, to help sorting two different numeric types.
* NOTE: the unsigned_long is not supported by widening sort since the unsigned_long could not be used with other types
*
* @opensearch.internal
*/
public class SortedWiderNumericSortField extends SortedNumericSortField {
private final int byteCounts;
private final Comparator<Number> comparator;

/**
* Creates a sort, possibly in reverse, specifying how the sort value from the document's set is
* selected.
Expand All @@ -39,6 +43,15 @@ public class SortedWiderNumericSortField extends SortedNumericSortField {
*/
public SortedWiderNumericSortField(String field, Type type, boolean reverse) {
super(field, type, reverse);
if (type == Type.LONG) {
byteCounts = Long.BYTES;
comparator = Comparator.comparingLong(Number::longValue);
} else if (type == Type.DOUBLE) {
byteCounts = Double.BYTES;
comparator = Comparator.comparingDouble(Number::doubleValue);
} else {
throw new IllegalArgumentException("Unsupported numeric type: " + type);
}
}

/**
Expand All @@ -51,7 +64,7 @@ public SortedWiderNumericSortField(String field, Type type, boolean reverse) {
*/
@Override
public FieldComparator<?> getComparator(int numHits, Pruning pruning) {
return new NumericComparator<Number>(getField(), (Number) getMissingValue(), getReverse(), pruning, Double.BYTES) {
return new NumericComparator<Number>(getField(), (Number) getMissingValue(), getReverse(), pruning, byteCounts) {
@Override
public int compare(int slot1, int slot2) {
throw new UnsupportedOperationException();
Expand All @@ -78,7 +91,7 @@ public int compareValues(Number first, Number second) {
} else if (second == null) {
return 1;
} else {
return Double.compare(first.doubleValue(), second.doubleValue());
return comparator.compare(first, second);
}
}
};
Expand Down

0 comments on commit da323e5

Please sign in to comment.