Add typechecks to Aggregate and Cross Tab (#6380)

Follow up of #6298 as it grew too much. Adds the needed typechecks to aggregate operations. Ensures that the DB operations report `Floating_Point_Equality` warning consistently with in-memory.
enso-org · Apr 24, 2023 · a43d524 · a43d524
1 parent 719bd8c
commit a43d524
Show file tree

Hide file tree

Showing 11 changed files with 208 additions and 137 deletions.
diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@@ -1220,6 +1220,8 @@ type Table
          - If there are no columns in the output table, a `No_Output_Columns` is
            raised as an error regardless of the problem behavior, because it is
            not possible to create a table without any columns.
+         - If a given aggregate is not supported by the backend,
+           `Unsupported_Database_Operation` is reported.
          - If a column index is out of range, a `Column_Indexes_Out_Of_Range` is
            reported according to the `on_problems` setting, unless
            `error_on_missing_columns` is set to `True`, in which case it is
@@ -1256,11 +1258,17 @@ type Table
     aggregate : Vector Aggregate_Column -> Boolean -> Problem_Behavior -> Table ! No_Output_Columns | Invalid_Aggregate_Column | Invalid_Output_Column_Names | Duplicate_Output_Column_Names | Floating_Point_Equality | Invalid_Aggregation | Unquoted_Delimiter | Additional_Warnings
     aggregate self columns (error_on_missing_columns=False) (on_problems=Report_Warning) =
         validated = Aggregate_Column_Helper.prepare_aggregate_columns columns self error_on_missing_columns=error_on_missing_columns
-        on_problems.attach_problems_before validated.problems <|
-            key_columns = validated.key_columns
+        key_columns = validated.key_columns
+        key_problems = key_columns.flat_map internal_column->
+            column = self.make_column internal_column
+            case column.value_type.is_floating_point of
+                True -> [Floating_Point_Equality.Error column.name]
+                False -> []
+        on_problems.attach_problems_before validated.problems+key_problems <|
             resolved_aggregates = validated.valid_columns
             key_expressions = key_columns.map .expression
             new_ctx = self.context.set_groups key_expressions
+            problem_builder = Problem_Builder.new
             ## TODO [RW] here we will perform as many fetches as there are
                aggregate columns, but technically we could perform just one
                fetch fetching all column types - TODO we should do that. We can
@@ -1269,15 +1277,15 @@ type Table
                point to a single query.
                See #6118.
             infer_from_database_callback expression =
-                    SQL_Type_Reference.new self.connection self.context expression
+                SQL_Type_Reference.new self.connection self.context expression
             dialect = self.connection.dialect
             type_mapping = dialect.get_type_mapping
             infer_return_type op_kind columns expression =
                 type_mapping.infer_return_type infer_from_database_callback op_kind columns expression
             results = resolved_aggregates.map p->
                 agg = p.second
                 new_name = p.first
-                result = Aggregate_Helper.make_aggregate_column agg new_name dialect infer_return_type
+                result = Aggregate_Helper.make_aggregate_column self agg new_name dialect infer_return_type problem_builder
                 ## If the `result` did contain an error, we catch it to be
                    able to store it in a vector and then we will partition the
                    created columns and failures.
@@ -1291,9 +1299,11 @@ type Table
                the `lift_aggregate` method to push the aggregates into a
                subquery.
             new_columns = partitioned.second
-            problems = partitioned.first.map .value
-            on_problems.attach_problems_before problems <|
-                self.updated_context_and_columns new_ctx new_columns subquery=True
+            problem_builder.attach_problems_before on_problems <|
+                problems = partitioned.first.map .value
+                on_problems.attach_problems_before problems <|
+                    if new_columns.is_empty then (Error.throw No_Output_Columns) else
+                        self.updated_context_and_columns new_ctx new_columns subquery=True
 
     ## Returns a new table with a chosen subset of columns left unchanged and
        the other columns pivoted to rows with a single name field and a single

diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/Aggregate_Helper.enso
@@ -2,7 +2,9 @@ from Standard.Base import all hiding First, Last
 import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
 
 import Standard.Table.Data.Aggregate_Column.Aggregate_Column
+import Standard.Table.Internal.Problem_Builder.Problem_Builder
 from Standard.Table.Data.Aggregate_Column.Aggregate_Column import all
+from Standard.Table.Errors import Floating_Point_Equality
 
 import project.Data.Dialect.Dialect
 import project.Data.Table.Table
@@ -16,14 +18,16 @@ from project.Errors import Unsupported_Database_Operation
    Creates an `Internal_Column` that will represent the computed aggregate.
 
    Arguments:
+   - table: The table owning the columns used in the aggregation.
    - aggregate: The description of the aggregation to compute.
    - new_name: The name for the created column.
    - dialect: The dialect of the database to generate the SQL for.
    - infer_return_type: A function that takes 3 arguments (name of the
      operation, list of input columns and a raw SQL IR Expression) and returns
      the inferred type for the aggregation.
-make_aggregate_column : Aggregate_Column -> Text -> Dialect -> (Any -> Any -> Any -> SQL_Type_Reference) -> SQL_Expression
-make_aggregate_column aggregate new_name dialect infer_return_type =
+   - problem_builder: A `Problem_Builder` instance used for reporting warnings.
+make_aggregate_column : Table -> Aggregate_Column -> Text -> Dialect -> (Any -> Any -> Any -> SQL_Type_Reference) -> Problem_Builder -> SQL_Expression
+make_aggregate_column table aggregate new_name dialect infer_return_type problem_builder =
     is_non_empty_selector v = v.is_nothing.not && v.not_empty
     simple_aggregate op_kind columns =
         expression = SQL_Expression.Operation op_kind (columns.map .expression)
@@ -55,7 +59,11 @@ make_aggregate_column aggregate new_name dialect infer_return_type =
             expression = SQL_Expression.Operation op_kind [SQL_Expression.Constant p, c.expression]
             sql_type_ref = infer_return_type op_kind [c] expression
             Internal_Column.Value new_name sql_type_ref expression
-        Mode c _ -> simple_aggregate "MODE" [c]
+        Mode c _ ->
+            col = table.make_column c
+            if col.value_type.is_floating_point then
+                problem_builder.report_other_warning (Floating_Point_Equality.Error new_name)
+            simple_aggregate "MODE" [c]
         First c _ ignore_nothing order_by -> case is_non_empty_selector order_by of
             False -> Error.throw (Unsupported_Database_Operation.Error "`First` aggregation requires at least one `order_by` column.")
             True ->

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@@ -1756,12 +1756,13 @@ type Table
 
         problem_builder.attach_problems_before on_problems <| Illegal_Argument.handle_java_exception <|
             java_key_columns = grouping.map .java_column
-            index  = self.java_table.indexFromColumns java_key_columns.to_array
+            index = self.java_table.indexFromColumns java_key_columns.to_array
 
             name_mapper = if matched_name.is_empty then Aggregate_Column_Helper.default_aggregate_column_name else
                 if validated_values.length == 1 then (_ -> "") else
                     all_same = Aggregate_Column_Helper.all_same_column validated_values
-                    c -> Aggregate_Column_Helper.default_aggregate_column_name c all_same
+                    include_column_name = all_same.not
+                    c -> Aggregate_Column_Helper.default_aggregate_column_name c include_column_name
 
             data_columns = validated_values.map c->
                 col_name = c.new_name.if_nothing <|

diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso
@@ -1,15 +1,15 @@
 from Standard.Base import all hiding First, Last
 
+import project.Data.Aggregate_Column.Aggregate_Column
 import project.Data.Column.Column
 import project.Data.Column_Selector.Column_Selector
 import project.Data.Sort_Column.Sort_Column
+import project.Data.Table.Table
+import project.Data.Type.Value_Type.Value_Type
 import project.Internal.Problem_Builder.Problem_Builder
 import project.Internal.Table_Helpers
 import project.Internal.Unique_Name_Strategy.Unique_Name_Strategy
-
-import Standard.Table.Data.Aggregate_Column.Aggregate_Column
 from project.Data.Aggregate_Column.Aggregate_Column import all
-import project.Data.Table.Table
 from project.Errors import No_Output_Columns, Duplicate_Output_Column_Names, Invalid_Output_Column_Names, Invalid_Aggregation
 
 polyglot java import org.enso.table.aggregations.Aggregator
@@ -107,7 +107,7 @@ default_aggregate_column_name aggregate_column include_column=True =
         _ ->
             prefix = Meta.get_simple_type_name aggregate_column . replace "_" " "
             c = aggregate_column.column
-            prefix + " " + (if include_column then c.name else "")
+            prefix + (if include_column then " " + c.name else "")
 
 ## PRIVATE
    Utility function to check if all aggregates are operating on the same source column.
@@ -128,6 +128,8 @@ all_same_column aggregates =
    indices or column references potentially from a different table) are
    replaced with column references from the provided table.
 
+   It also verifies that the columns have the right types.
+
    `Sort_Column`s are replaced with column references of matched columns coming
    from the provided table.
 
@@ -160,6 +162,18 @@ resolve_aggregate table problem_builder aggregate_column =
                 Internal_Order_By_Column_Reference.Value c.column c.associated_selector.direction
             sort_columns
 
+    resolve_numeric c =
+        internal_column = resolve c
+        col = columns_helper.make_column internal_column
+        Value_Type.expect_numeric col <|
+            internal_column
+
+    resolve_text c =
+        internal_column = resolve c
+        col = columns_helper.make_column internal_column
+        Value_Type.expect_text col <|
+            internal_column
+
     result = case aggregate_column of
         Group_By c new_name -> Group_By (resolve c) new_name
         Count new_name -> Count new_name
@@ -168,21 +182,21 @@ resolve_aggregate table problem_builder aggregate_column =
             Count_Distinct new_c new_name ignore_nothing
         Count_Not_Nothing c new_name -> Count_Not_Nothing (resolve c) new_name
         Count_Nothing c new_name -> Count_Nothing (resolve c) new_name
-        Count_Not_Empty c new_name -> Count_Not_Empty (resolve c) new_name
-        Count_Empty c new_name ->  Count_Empty (resolve c) new_name
-        Sum c new_name ->  Sum (resolve c) new_name
-        Average c new_name -> Average (resolve c) new_name
-        Median c new_name -> Median (resolve c) new_name
-        Percentile p c new_name -> Percentile p (resolve c) new_name
+        Count_Not_Empty c new_name -> Count_Not_Empty (resolve_text c) new_name
+        Count_Empty c new_name ->  Count_Empty (resolve_text c) new_name
+        Sum c new_name -> Sum (resolve_numeric c) new_name
+        Average c new_name -> Average (resolve_numeric c) new_name
+        Median c new_name -> Median (resolve_numeric c) new_name
+        Percentile p c new_name -> Percentile p (resolve_numeric c) new_name
         Mode c new_name -> Mode (resolve c) new_name
-        Standard_Deviation c new_name population -> Standard_Deviation (resolve c) new_name population
-        Concatenate c new_name separator prefix suffix quote_char -> Concatenate (resolve c) new_name separator prefix suffix quote_char
+        Standard_Deviation c new_name population -> Standard_Deviation (resolve_numeric c) new_name population
+        Concatenate c new_name separator prefix suffix quote_char -> Concatenate (resolve_text c) new_name separator prefix suffix quote_char
         First c new_name ignore_nothing order_by -> First (resolve c) new_name ignore_nothing (resolve_order_by order_by)
         Last c new_name ignore_nothing order_by -> Last (resolve c) new_name ignore_nothing (resolve_order_by order_by)
         Maximum c new_name -> Maximum (resolve c) new_name
         Minimum c new_name -> Minimum (resolve c) new_name
-        Shortest c new_name -> Shortest (resolve c) new_name
-        Longest c new_name -> Longest (resolve c) new_name
+        Shortest c new_name -> Shortest (resolve_text c) new_name
+        Longest c new_name -> Longest (resolve_text c) new_name
 
     ## Downgrade the `Internal_Missing_Column_Error` error into a `Nothing`
        value, keeping any other dataflow errors intact.

diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso
@@ -91,7 +91,7 @@ expect_warning expected_warning result =
     found = warnings.find if_missing=Nothing x->
         (x == expected_warning) || (x.is_a expected_warning)
     if found.is_nothing then
-        loc = Meta.get_source_location 3
+        loc = Meta.get_source_location 2
         Test.fail "Expected the result to contain a warning: "+expected_warning.to_text+", but it did not. The warnings were "+warnings.short_display_text+' (at '+loc+').'
 
 ## UNSTABLE

diff --git a/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java b/std-bits/table/src/main/java/org/enso/table/data/index/MultiValueIndex.java
@@ -215,11 +215,12 @@ public Table makeCrossTabTable(
     }
 
     // Merge Problems
-    AggregatedProblems[] problems = new AggregatedProblems[aggregates.length + 2];
+    AggregatedProblems[] problems = new AggregatedProblems[aggregates.length + 3];
     problems[0] = this.problems;
     problems[1] = AggregatedProblems.of(outputTableNameDeduplicator.getProblems());
+    problems[2] = nameIndex.getProblems();
     for (int i = 0; i < aggregates.length; i++) {
-      problems[i + 2] = aggregates[i].getProblems();
+      problems[i + 3] = aggregates[i].getProblems();
     }
     AggregatedProblems merged = AggregatedProblems.merge(problems);