Skip to content

Commit

Permalink
SQL: Fix ORDER BY on aggregates and GROUPed BY fields (#51894)
Browse files Browse the repository at this point in the history
Previously, in the in-memory sorting module
`LocalAggregationSorterListener` only the aggregate functions where used
(grabbed by the `sortingColumns`). As a consequence, if the ORDER BY
was also using columns of the GROUP BY clause, (especially in the case
of higher priority - before the aggregate functions) wrong results were
produced. E.g.:
```
SELECT gender, MAX(salary) AS max FROM test_emp
GROUP BY gender
ORDER BY gender, max
```

Add all columns of the ORDER BY to the `sortingColumns` so that the
`LocalAggregationSorterListener` can use the correct comparators in
the underlying PriorityQueue used to implement the in-memory sorting.

Fixes: #50355
(cherry picked from commit be680af)
  • Loading branch information
matriv committed Feb 12, 2020
1 parent a53e4aa commit 66e1092
Show file tree
Hide file tree
Showing 9 changed files with 281 additions and 101 deletions.
48 changes: 47 additions & 1 deletion x-pack/plugin/sql/qa/src/main/resources/agg-ordering.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,50 @@ g:s | gender:s | s3:i | SUM(salary):i | s5:i
M |M |2671054|2671054 |2671054
F |F |1666196|1666196 |1666196
null |null |487605 |487605 |487605
;
;

histogramDateTimeWithCountAndOrder_1
schema::h:ts|c:l
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC, c ASC;

h | c
------------------------+---------------
1965-01-01T00:00:00.000Z|1
1964-01-01T00:00:00.000Z|4
1963-01-01T00:00:00.000Z|7
1962-01-01T00:00:00.000Z|6
1961-01-01T00:00:00.000Z|8
1960-01-01T00:00:00.000Z|8
1959-01-01T00:00:00.000Z|9
1958-01-01T00:00:00.000Z|7
1957-01-01T00:00:00.000Z|4
1956-01-01T00:00:00.000Z|5
1955-01-01T00:00:00.000Z|4
1954-01-01T00:00:00.000Z|8
1953-01-01T00:00:00.000Z|11
1952-01-01T00:00:00.000Z|8
null |10
;

histogramDateTimeWithCountAndOrder_2
schema::h:ts|c:l
SELECT HISTOGRAM(birth_date, INTERVAL 1 YEAR) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY c DESC, h ASC;

h | c
------------------------+---------------
1953-01-01T00:00:00.000Z|11
null |10
1959-01-01T00:00:00.000Z|9
1952-01-01T00:00:00.000Z|8
1954-01-01T00:00:00.000Z|8
1960-01-01T00:00:00.000Z|8
1961-01-01T00:00:00.000Z|8
1958-01-01T00:00:00.000Z|7
1963-01-01T00:00:00.000Z|7
1962-01-01T00:00:00.000Z|6
1956-01-01T00:00:00.000Z|5
1955-01-01T00:00:00.000Z|4
1957-01-01T00:00:00.000Z|4
1964-01-01T00:00:00.000Z|4
1965-01-01T00:00:00.000Z|1
;
27 changes: 24 additions & 3 deletions x-pack/plugin/sql/qa/src/main/resources/agg-ordering.sql-spec
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ aggNotSpecifiedInTheAggregateAndGroupWithHavingWithLimitAndDirection
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY MAX(salary) ASC, c DESC LIMIT 5;

groupAndAggNotSpecifiedInTheAggregateWithHaving
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender, MAX(salary);
SELECT gender, MIN(salary) AS min, COUNT(*) AS c FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender NULLS FIRST, MAX(salary);

multipleAggsThatGetRewrittenWithAliasOnAMediumGroupBy
SELECT languages, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY languages ORDER BY max;
Expand Down Expand Up @@ -136,5 +136,26 @@ SELECT gender AS g, first_name AS f, last_name AS l FROM test_emp GROUP BY f, ge
multipleGroupingsAndOrderingByGroups_8
SELECT gender AS g, first_name, last_name FROM test_emp GROUP BY g, last_name, first_name ORDER BY gender ASC, first_name DESC, last_name ASC;

multipleGroupingsAndOrderingByGroupsWithFunctions
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY gender, c DESC, first_name, last_name ASC;
multipleGroupingsAndOrderingByGroupsAndAggs_1
SELECT gender, MIN(salary) AS min, COUNT(*) AS c, MAX(salary) AS max FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender ASC NULLS FIRST, MAX(salary) DESC;

multipleGroupingsAndOrderingByGroupsAndAggs_2
SELECT gender, MIN(salary) AS min, COUNT(*) AS c, MAX(salary) AS max FROM test_emp GROUP BY gender HAVING c > 1 ORDER BY gender DESC NULLS LAST, MAX(salary) ASC;

multipleGroupingsAndOrderingByGroupsWithFunctions_1
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY gender NULLS FIRST, c DESC, first_name, last_name ASC;

multipleGroupingsAndOrderingByGroupsWithFunctions_2
SELECT first_name f, last_name l, gender g, CONCAT(first_name, last_name) c FROM test_emp GROUP BY gender, l, f, c ORDER BY c DESC, gender DESC NULLS LAST, first_name, last_name ASC;

multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_1
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 1 NULLS FIRST, 2, 3;

multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_2
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 1 DESC NULLS LAST, 2, 3;

multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_3
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 2, 1 NULLS FIRST, 3;

multipleGroupingsAndOrderingByGroupsAndAggregatesWithFunctions_4
SELECT CONCAT('foo', gender) g, MAX(salary) AS max, MIN(salary) AS min FROM test_emp GROUP BY g ORDER BY 3 DESC, 1 NULLS FIRST, 2;
Original file line number Diff line number Diff line change
Expand Up @@ -605,36 +605,51 @@ static class AggSortingQueue extends PriorityQueue<Tuple<List<?>, Integer>> {
this.sortingColumns = sortingColumns;
}

// compare row based on the received attribute sort
// if a sort item is not in the list, it is assumed the sorting happened in ES
// and the results are left as is (by using the row ordering), otherwise it is sorted based on the given criteria.
//
// Take for example ORDER BY a, x, b, y
// a, b - are sorted in ES
// x, y - need to be sorted client-side
// sorting on x kicks in, only if the values for a are equal.

/**
* Compare row based on the received attribute sort
* <ul>
* <li>
* If a tuple in {@code sortingColumns} has a null comparator, it is assumed the sorting
* happened in ES and the results are left as is (by using the row ordering), otherwise it is
* sorted based on the given criteria.
* </li>
* <li>
* If no tuple exists in {@code sortingColumns} for an output column, it means this column
* is not included at all in the ORDER BY
* </li>
*</ul>
*
* Take for example ORDER BY a, x, b, y
* a, b - are sorted in ES
* x, y - need to be sorted client-side
* sorting on x kicks in only if the values for a are equal.
* sorting on y kicks in only if the values for a, x and b are all equal
*
*/
// thanks to @jpountz for the row ordering idea as a way to preserve ordering
@SuppressWarnings("unchecked")
@Override
protected boolean lessThan(Tuple<List<?>, Integer> l, Tuple<List<?>, Integer> r) {
for (Tuple<Integer, Comparator> tuple : sortingColumns) {
int i = tuple.v1().intValue();
int columnIdx = tuple.v1().intValue();
Comparator comparator = tuple.v2();

Object vl = l.v1().get(i);
Object vr = r.v1().get(i);
// Get the values for left and right rows at the current column index
Object vl = l.v1().get(columnIdx);
Object vr = r.v1().get(columnIdx);
if (comparator != null) {
int result = comparator.compare(vl, vr);
// if things are equals, move to the next comparator
// if things are not equal: return the comparison result,
// otherwise: move to the next comparator to solve the tie.
if (result != 0) {
return result > 0;
}
}
// no comparator means the existing order needs to be preserved
// no comparator means the rows are pre-ordered by ES for the column at
// the current index and the existing order needs to be preserved
else {
// check the values - if they are equal move to the next comparator
// otherwise return the row order
// check the values - if they are not equal return the row order
// otherwise: move to the next comparator to solve the tie.
if (Objects.equals(vl, vr) == false) {
return l.v2().compareTo(r.v2()) > 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ private static void sorting(QueryContainer container, SearchSourceBuilder source
source.sort("_doc");
return;
}
for (Sort sortable : container.sort()) {
for (Sort sortable : container.sort().values()) {
SortBuilder<?> sortBuilder = null;

if (sortable instanceof AttributeSort) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@
import org.elasticsearch.xpack.sql.querydsl.container.GlobalCountRef;
import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef;
import org.elasticsearch.xpack.sql.querydsl.container.GroupByRef.Property;
import org.elasticsearch.xpack.sql.querydsl.container.GroupingFunctionSort;
import org.elasticsearch.xpack.sql.querydsl.container.MetricAggRef;
import org.elasticsearch.xpack.sql.querydsl.container.PivotColumnRef;
import org.elasticsearch.xpack.sql.querydsl.container.QueryContainer;
Expand Down Expand Up @@ -681,37 +682,36 @@ protected PhysicalPlan rule(OrderExec plan) {

// TODO: might need to validate whether the target field or group actually exist
if (group != null && group != Aggs.IMPLICIT_GROUP_KEY) {
// check whether the lookup matches a group
if (group.id().equals(lookup)) {
qContainer = qContainer.updateGroup(group.with(direction));
}
// else it's a leafAgg
else {
qContainer = qContainer.updateGroup(group.with(direction));
}
qContainer = qContainer.updateGroup(group.with(direction));
}

// field
if (orderExpression instanceof FieldAttribute) {
qContainer = qContainer.addSort(lookup,
new AttributeSort((FieldAttribute) orderExpression, direction, missing));
}
// scalar functions typically require script ordering
else if (orderExpression instanceof ScalarFunction) {
ScalarFunction sf = (ScalarFunction) orderExpression;
// nope, use scripted sorting
qContainer = qContainer.addSort(lookup, new ScriptSort(sf.asScript(), direction, missing));
}
// histogram
else if (orderExpression instanceof Histogram) {
qContainer = qContainer.addSort(lookup, new GroupingFunctionSort(direction, missing));
}
// score
else if (orderExpression instanceof Score) {
qContainer = qContainer.addSort(lookup, new ScoreSort(direction, missing));
}
// agg function
else if (orderExpression instanceof AggregateFunction) {
qContainer = qContainer.addSort(lookup,
new AggregateSort((AggregateFunction) orderExpression, direction, missing));
}
// unknown
else {
// scalar functions typically require script ordering
if (orderExpression instanceof ScalarFunction) {
ScalarFunction sf = (ScalarFunction) orderExpression;
// nope, use scripted sorting
qContainer = qContainer.addSort(new ScriptSort(sf.asScript(), direction, missing));
}
// score
else if (orderExpression instanceof Score) {
qContainer = qContainer.addSort(new ScoreSort(direction, missing));
}
// field
else if (orderExpression instanceof FieldAttribute) {
qContainer = qContainer.addSort(new AttributeSort((FieldAttribute) orderExpression, direction, missing));
}
// agg function
else if (orderExpression instanceof AggregateFunction) {
qContainer = qContainer.addSort(new AggregateSort((AggregateFunction) orderExpression, direction, missing));
} else {
// unknown
throw new SqlIllegalArgumentException("unsupported sorting expression {}", orderExpression);
}
throw new SqlIllegalArgumentException("unsupported sorting expression {}", orderExpression);
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License;
* you may not use this file except in compliance with the Elastic License.
*/
package org.elasticsearch.xpack.sql.querydsl.container;

import java.util.Objects;

public class GroupingFunctionSort extends Sort {

public GroupingFunctionSort(Direction direction, Missing missing) {
super(direction, missing);
}

@Override
public int hashCode() {
return Objects.hash(direction(), missing());
}

@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}

if (obj == null || getClass() != obj.getClass()) {
return false;
}

GroupingFunctionSort other = (GroupingFunctionSort) obj;
return Objects.equals(direction(), other.direction())
&& Objects.equals(missing(), other.missing());
}
}
Loading

0 comments on commit 66e1092

Please sign in to comment.