Skip to content

Commit

Permalink
feat: add support for enabling ORDER BY on non projected columns (#1155)
Browse files Browse the repository at this point in the history
In response to PR #1147, we've introduced support for the use of ORDER
BY on non-projected columns

```
SELECT name FROM meme_images
ORDER BY Similarity(SiftFeatureExtractor(Open("{}")), SiftFeatureExtractor(data))
LIMIT 1;

SELECT FaceDetector(data) FROM MyVideo WHERE id < 5 order by scores;

SELECT AirForecast(12) order by y;

Select name FROM meme_images ORDER BY name;
```

The following query won't work. 

`Select name AS n FROM meme_images ORDER BY n;`

Currently, we execute projection as the last operation. Only when a
function expression is present it is executed before projection,
allowing its use in ORDER BY. A complete fix will require us to redesign
the binder. Right now, we've decided to defer this task because there is
no urgency to support it.

Ref:
https://github.com/duckdb/duckdb/blob/52a47a6b311a8aa008e7c78d8472eb52a937fa62/src/planner/expression_binder/order_binder.cpp#L68
  • Loading branch information
gaurav274 authored Sep 19, 2023
1 parent c559d0b commit 5dc2282
Show file tree
Hide file tree
Showing 4 changed files with 69 additions and 17 deletions.
6 changes: 2 additions & 4 deletions evadb/binder/binder_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,9 +363,7 @@ def drop_row_id_from_target_list(
return filtered_list


def add_func_expr_outputs_to_binder_context(
func_expr: FunctionExpression, binder_context: StatementBinderContext
):
def get_bound_func_expr_outputs_as_tuple_value_expr(func_expr: FunctionExpression):
output_cols = []
for obj, alias in zip(func_expr.output_objs, func_expr.alias.col_names):
col_alias = "{}.{}".format(func_expr.alias.alias_name, alias)
Expand All @@ -376,4 +374,4 @@ def add_func_expr_outputs_to_binder_context(
col_alias=col_alias,
)
output_cols.append(alias_obj)
binder_context.add_derived_table_alias(func_expr.alias.alias_name, output_cols)
return output_cols
12 changes: 9 additions & 3 deletions evadb/binder/statement_binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,13 @@

from evadb.binder.binder_utils import (
BinderError,
add_func_expr_outputs_to_binder_context,
bind_table_info,
check_column_name_is_string,
check_groupby_pattern,
check_table_object_is_groupable,
drop_row_id_from_target_list,
extend_star,
get_bound_func_expr_outputs_as_tuple_value_expr,
get_column_definition_from_select_target_list,
handle_bind_extract_object_function,
resolve_alias_table_value_expression,
Expand Down Expand Up @@ -201,7 +201,10 @@ def _bind_select_statement(self, node: SelectStatement):
for expr in node.target_list:
self.bind(expr)
if isinstance(expr, FunctionExpression):
add_func_expr_outputs_to_binder_context(expr, self._binder_context)
output_cols = get_bound_func_expr_outputs_as_tuple_value_expr(expr)
self._binder_context.add_derived_table_alias(
expr.alias.alias_name, output_cols
)

if node.groupby_clause:
self.bind(node.groupby_clause)
Expand Down Expand Up @@ -279,7 +282,10 @@ def _bind_tableref(self, node: TableRef):
func_expr = node.table_valued_expr.func_expr
func_expr.alias = node.alias
self.bind(func_expr)
add_func_expr_outputs_to_binder_context(func_expr, self._binder_context)
output_cols = get_bound_func_expr_outputs_as_tuple_value_expr(func_expr)
self._binder_context.add_derived_table_alias(
func_expr.alias.alias_name, output_cols
)
else:
raise BinderError(f"Unsupported node {type(node)}")

Expand Down
62 changes: 54 additions & 8 deletions evadb/optimizer/statement_to_opr_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from evadb.binder.binder_utils import get_bound_func_expr_outputs_as_tuple_value_expr
from evadb.expression.abstract_expression import AbstractExpression
from evadb.expression.function_expression import FunctionExpression
from evadb.optimizer.operators import (
LogicalCreate,
LogicalCreateFunction,
Expand Down Expand Up @@ -53,8 +55,8 @@
from evadb.parser.select_statement import SelectStatement
from evadb.parser.show_statement import ShowStatement
from evadb.parser.statement import AbstractStatement
from evadb.parser.table_ref import TableRef
from evadb.parser.types import FunctionType
from evadb.parser.table_ref import JoinNode, TableRef, TableValuedExpression
from evadb.parser.types import FunctionType, JoinType
from evadb.utils.logging_manager import logger


Expand Down Expand Up @@ -121,7 +123,54 @@ def visit_select(self, statement: SelectStatement):

# order of evaluation
# from, where, group by, select, order by, limit, union

# if there is a table_ref, order by clause and no group by clause, we move all # the function expressions out of projection list to table valued expression.
# This is done to handle the
# https://github.com/georgia-tech-db/evadb/issues/1147
# and https://github.com/georgia-tech-db/evadb/issues/1130.
# It is a bit ugly but a complete fix would require modifying the binder

col_with_func_exprs = []

if (
statement.from_table
and statement.orderby_list
and statement.groupby_clause is None
):
projection_cols = []
for col in statement.target_list:
if isinstance(col, FunctionExpression):
col_with_func_exprs.append(col)
# append the TupleValueExpression for the FunctionExpression
projection_cols.extend(
get_bound_func_expr_outputs_as_tuple_value_expr(col)
)
else:
projection_cols.append(col)

# update target list with projection cols
statement.target_list = projection_cols

table_ref = statement.from_table
if not table_ref and col_with_func_exprs:
# if there is no table source, we add a projection node with all the
# function expressions
self._visit_projection(col_with_func_exprs)
else:
# add col_with_func_exprs to TableValuedExpressions
for col in col_with_func_exprs:
tve = TableValuedExpression(col)
if table_ref:
table_ref = TableRef(
JoinNode(
table_ref,
TableRef(tve, alias=col.alias),
join_type=JoinType.LATERAL_JOIN,
)
)

statement.from_table = table_ref

if table_ref is not None:
self.visit_table_ref(table_ref)

Expand All @@ -135,18 +184,15 @@ def visit_select(self, statement: SelectStatement):
if statement.groupby_clause is not None:
self._visit_groupby(statement.groupby_clause)

# Projection operator
select_columns = statement.target_list

if select_columns is not None:
self._visit_projection(select_columns)

if statement.orderby_list is not None:
self._visit_orderby(statement.orderby_list)

if statement.limit_count is not None:
self._visit_limit(statement.limit_count)

if statement.target_list is not None:
self._visit_projection(statement.target_list)

# union
if statement.union_link is not None:
self._visit_union(statement.union_link, statement.union_all)
Expand Down
6 changes: 4 additions & 2 deletions test/integration_tests/short/test_select_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@

from evadb.binder.binder_utils import BinderError
from evadb.models.storage.batch import Batch
from evadb.optimizer.operators import LogicalFilter
from evadb.server.command_handler import execute_query_fetch_all

NUM_FRAMES = 10
Expand Down Expand Up @@ -416,9 +417,10 @@ def test_hash_join_with_multiple_on(self):

def test_expression_tree_signature(self):
plan = get_logical_query_plan(
self.evadb, "SELECT DummyMultiObjectDetector(data).labels FROM MyVideo"
self.evadb,
"SELECT id FROM MyVideo WHERE DummyMultiObjectDetector(data).labels @> ['person'];",
)
signature = plan.target_list[0].signature()
signature = next(plan.find_all(LogicalFilter)).predicate.children[0].signature()
function_id = (
self.evadb.catalog()
.get_function_catalog_entry_by_name("DummyMultiObjectDetector")
Expand Down

0 comments on commit 5dc2282

Please sign in to comment.