diff --git a/.github/workflows/scala.yml b/.github/workflows/scala.yml index 0dc78837f868..bd6045293020 100644 --- a/.github/workflows/scala.yml +++ b/.github/workflows/scala.yml @@ -286,6 +286,7 @@ jobs: $ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Geo_Tests $ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Visualization_Tests $ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Image_Tests + $ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Examples_Tests - name: Compile the Standard Libraries (Unix) shell: bash @@ -311,6 +312,7 @@ jobs: $ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Geo_Tests $ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Visualization_Tests $ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Image_Tests + $ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Examples_Tests - name: Test Engine Distribution Without Caches (Windows) shell: bash @@ -321,6 +323,7 @@ jobs: $ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Geo_Tests $ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Visualization_Tests $ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Image_Tests + $ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Examples_Tests - name: Compile the Standard Libraries (Windows) shell: bash @@ -346,6 +349,7 @@ jobs: $ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Geo_Tests $ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Visualization_Tests $ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Image_Tests + $ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Examples_Tests # Publish - name: Compress the built artifacts for upload diff --git a/CHANGELOG.md b/CHANGELOG.md index dd9ac2c5064b..2027a0b53a2c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -139,6 +139,8 @@ API and added builders for customizing less common settings.][3516] - [Allow control of sort direction in `First` and `Last` aggregations.][3517] - [Implemented `Text.write`, replacing `File.write_text`.][3518] +- [Removed obsolete `select`, `group`, `sort` and releated types from tables.] + [3519] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -219,6 +221,7 @@ [3516]: https://github.com/enso-org/enso/pull/3516 [3517]: https://github.com/enso-org/enso/pull/3517 [3518]: https://github.com/enso-org/enso/pull/3518 +[3519]: https://github.com/enso-org/enso/pull/3519 #### Enso Compiler diff --git a/distribution/lib/Standard/Database/0.0.0-dev/package.yaml b/distribution/lib/Standard/Database/0.0.0-dev/package.yaml index 30b6ddfeec6c..e034c09d539b 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/package.yaml +++ b/distribution/lib/Standard/Database/0.0.0-dev/package.yaml @@ -24,8 +24,8 @@ component-groups: - Standard.Base.Join: exports: - Standard.Database.Data.Table.Table.join - - Standard.Database.Data.Table.Table.group + - Standard.Database.Data.Table.Table.aggregate - Standard.Base.Transform: exports: - - Standard.Database.Data.Table.Table.sort + - Standard.Database.Data.Table.Table.order_by - Standard.Database.Data.Column.Column.to_table diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index a64fccf30684..d0b02efc5a48 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -4,6 +4,8 @@ import Standard.Database.Data.Internal.Helpers import Standard.Database.Data.Internal.IR import Standard.Database.Data.Table import Standard.Table.Data.Column as Materialized_Column +import Standard.Table.Data.Sort_Column_Selector +import Standard.Table.Data.Sort_Column from Standard.Database.Data.Sql import Sql_Type from Standard.Database.Data.Table import Integrity_Error @@ -458,12 +460,11 @@ type Column column.sort > Example - Sorting `column` in descending order, placing missing values at the - top of the resulting column. - column.sort order=Sort_Direction.Descending missing_last=False - sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column - sort order=Sort_Direction.Ascending missing_last=True = - this.to_table.sort by=this order=order missing_last=missing_last . at this.name + Sorting `column` in descending order. + column.sort order=Sort_Direction.Descending + sort : Sort_Direction -> Column + sort order=Sort_Direction.Ascending = + this.to_table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column this order]) . at this.name ## UNSTABLE @@ -637,4 +638,3 @@ lift_aggregate new_name connection expected_type expr context = new_ixes = cols.second new_ctx = IR.subquery_as_ctx subquery . set_index new_ixes Column new_name connection new_col.sql_type new_col.expression new_ctx - diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index a642dc0a9f6e..c9133d992b57 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -17,7 +17,6 @@ import Standard.Table.Internal.Aggregate_Column_Helper from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column_Builder from Standard.Database.Data.Internal.IR import Internal_Column from Standard.Table.Data.Table import No_Such_Column_Error -from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter @@ -356,7 +355,7 @@ type Table Since this Table is backed by an SQL database, the Table returned by the `limit` method is deterministic only if the Table has been ordered (using - the `sort` method). + the `order_by` method). Otherwise, no order is imposed, so the returned Table will include at most `max_rows` rows, but there are no guarantees on which rows will be @@ -365,7 +364,7 @@ type Table Table is materialized. The limit is applied at the very end, so the new Table behaves exactly as - the old one, just limitting its results when being materialized. + the old one, just limiting its results when being materialized. Specifically, applying further filters will still apply to the whole result set and the limit will be taken after applying these filters. @@ -373,7 +372,7 @@ type Table In the call below, assuming that the table of `t1` contains rows for numbers 1, 2, ..., 10, will return rows starting from 6 and not an empty result as one could expect if the limit was applied before the filters. - t1 = table.sort by='A' . limit 5 + t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5 t2 = t1.where (t1.at 'A' > 5) t2.to_dataframe limit : Integer -> Table @@ -481,6 +480,28 @@ type Table descending order. table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending]) + > Example + Sorting `table` in ascending order by the value in column `'Quantity'`. + + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity']) + + > Example + Sorting `table` in descending order by the value in column `'Quantity'`. + + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity' Sort_Direction.Descending]) + + > Example + Sorting `table` in ascending order by the value in column `'Quantity'`, + using the value in column `'Rating'` for breaking ties. + + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating']) + + > Example + Sorting `table` in ascending order by the value in column `'Quantity'`, + using the value in column `'Rating'` in descending order for breaking + ties. + + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending]) order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning = Panic.handle_wrapped_dataflow_error <| problem_builder = Problem_Builder.new @@ -497,91 +518,6 @@ type Table new_ctx = this.context.add_orders new_order_descriptors this.updated_context new_ctx - ## UNSTABLE - - Sorts the table according to the specified rules. - - Arguments: - - by: Specifies the columns used for reordering the table. This - argument may be one of: - - a text: The text is treated as a column name. - - a column: Any column, which is an expression computed from this - table. - - an order rule: Specifies both the sorting column and additional - settings, that will take precedence over the global parameters of - this sort operation. The `column` field of the rule may be a text - or a column, with the semantics described above. - - a vector of any of the above: This will result in a hierarchical - sorting, such that the first rule is applied first, the second is - used for breaking ties, etc. - - order: Specifies the default sort order for this operation. All the - rules specified in the `by` argument will default to this setting, - unless specified in the rule. - - missing_last: Specifies the default placement of missing values when - compared to non-missing ones. This setting may be overridden by the - particular rules of the `by` argument. Note thet this argument is - independent from `order`, i.e. missing values will always be sorted - according to this rule, ignoring the ascending / descending setting. - - > Example - Sorting `table` in ascending order by the value in column `'Quantity'` - table.sort by='Quantity' - - > Example - Sorting `table` in descending order by the value in column `'Quantity'`, - placing missing values at the top of the table. - table.sort by='Quantity' order=Sort_Direction.Descending missing_last=False - - > Example - Sorting `table` in ascending order by the value in column `'Quantity'`, - using the value in column `'Rating'` for breaking ties. - table.sort by=['Quantity', 'Rating'] - - > Example - Sorting `table` in ascending order by the value in column `'Quantity'`, - using the value in column `'Rating'` in descending order for breaking - ties. - table.sort by=['Quantity', Order_Rule 'Rating' (order=Sort_Direction.Descending)] - - > Example - Sorting `table` in ascending order by the value in an externally - computed column, using the value in column `'Rating'` for breaking - ties. - quality_ratio = table.at 'Rating' / table.at 'Price' - table.sort by=[quality_ratio, 'Rating'] - sort : Text | Column | Order_Rule | Vector.Vector (Text | Column | Order_Rule) -> Sort_Direction -> Boolean -> Table - sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <| - missing_to_ir last = case last of - True -> IR.Nulls_Last - False -> IR.Nulls_First - wrap_elem elem = - IR.Order_Descriptor (this.resolve elem . expression) order (missing_to_ir missing_last) collation=Nothing - to_ir elem = case elem of - Text -> wrap_elem elem - Column _ _ _ _ _ -> wrap_elem elem - Order_Rule elem Nothing my_order my_nulls -> - chosen_order = my_order.if_nothing order - chosen_nulls = my_nulls.if_nothing missing_last - IR.Order_Descriptor (this.resolve elem . expression) chosen_order (missing_to_ir chosen_nulls) collation=Nothing - Order_Rule _ _ _ _ -> - Error.throw <| Unsupported_Database_Operation_Error "Custom comparators are not supported in Database" - elems = Helpers.unify_vector_singleton by . map to_ir - new_ctx = this.context.set_orders elems - this.updated_context new_ctx - - ## UNSTABLE - - Selects a subset of columns from this table by name. - - Arguments: - - columns: The names of the columns to select from the table. - select : Vector Text -> Table - select columns = - candidates = this.internal_columns + this.context.meta_index - find_col name = candidates.find (p -> p.name == name) - selected_cols = columns.map (find_col >> .catch) . filter (c -> c.is_nothing.not) - this.updated_columns selected_cols - ## UNSTABLE Efficiently joins two tables based on either the index or a key column. @@ -675,28 +611,31 @@ type Table Table new_table_name this.connection new_columns new_ctx - ## UNSTABLE + ## ALIAS group, summarize - Returns an aggregate table resulting from grouping the elements by the - value of the specified column. + Aggregates the rows in a table using any `Group_By` entries in columns. + The columns argument specifies which additional aggregations to perform and to return. Arguments: - - by: The column names on which to group. If this is not set, the index - will be used for grouping instead. - group : Vector Text | Text | Nothing -> Aggregate_Table - group by=Nothing = Panic.recover Any <| - cols = case by of - Nothing -> - if this.context.meta_index.is_empty then Panic.throw <| Illegal_State_Error "Trying to group by an empty index." else - this.context.meta_index - _ -> - - Helpers.unify_vector_singleton by . map (this.resolve >> .as_internal) - exprs = cols.map .expression - new_ctx = this.context.set_groups exprs . set_index cols - Aggregate_Table this.name this.connection this.internal_columns new_ctx - - ## Prototype Group By function + - columns: Vector of `Aggregate_Column` specifying the aggregated table. + - on_problems: Specifies how to handle problems if they occur, reporting + them as warnings by default. + + The following problems can occur: + - If a column name is not in the input table, a `Missing_Input_Columns`. + - If a column index is out of range, a `Column_Indexes_Out_Of_Range`. + - If there are no valid columns in the output table, a `No_Output_Columns`. + - If there are invalid column names in the output table, a `Invalid_Output_Column_Names`. + - If there are duplicate column names in the output table, a `Duplicate_Output_Column_Names`. + - If grouping on or computing the `Mode` on a floating point number, a `Floating_Point_Grouping`. + - If an aggregation fails, an `Invalid_Aggregation_Method`. + - If when concatenating values there is an quoted delimited, an `Unquoted_Delimiter` + - If there are more than 10 issues with a single column, an `Additional_Warnings`. + + > Example + Group by the Key column, count the rows + + table.aggregate [Group_By "Key", Count Nothing] aggregate : [Aggregate_Column] -> Problem_Behavior -> Table aggregate columns (on_problems=Report_Warning) = validated = Aggregate_Column_Helper.prepare_aggregate_columns columns this @@ -980,70 +919,6 @@ type Table # TODO This should ideally be done in a streaming manner, or at least respect the row limits. this.to_dataframe.write path format on_existing_file column_mapping on_problems -## Represents a table with grouped rows. -type Aggregate_Table - - ## UNSTABLE - - Represents a table with grouped rows. - - Arguments: - - name: The name of the table. - - connection: The connection with which the table is associated. - - internal_columns: The internal representation of the table columns. - - context: The context associated with this table. - # type Aggregate_Table (name : Text) (connection : Connection) - # (internal_columns : Vector [Text, IR.Expression]) - # (context : IR.Context) - type Aggregate_Table name connection internal_columns context - - ## UNSTABLE - - Returns a vector of aggregate columns in this table. - columns : Vector.Vector - columns = this.internal_columns . map this.make_column - - ## UNSTABLE - - Returns a column containing the number of elements in each group. - count : Column - count = - expr = IR.Operation "COUNT_ROWS" [] - # new_name connection expected_type expr context - Column_Module.lift_aggregate "count" this.connection Sql.Sql_Type.integer expr this.context - - ## UNSTABLE - - Returns an aggregate column with the given name, contained in this table. - - Arguments: - - name: The name of the aggregate column to get from the aggregate table. - at : Text -> Column ! No_Such_Column_Error - at name = - internal = this.internal_columns.find (p -> p.name == name) - this.make_column internal . map_error (_ -> No_Such_Column_Error name) - - ## PRIVATE - - Helper to create aggregate columns from internal columns. - - Arguments: - - internal: The internal column to make into an aggregate column. - make_column : Internal_Column -> Aggregate_Column_Builder - make_column internal = - Aggregate_Column_Builder internal.name this.connection internal.sql_type internal.expression this.context - - ## PRIVATE - - Helper that returns the underlying table from before grouping. - ungrouped : Table - ungrouped = - new_ctx = this.context.set_groups [] - new_cols = this.internal_columns.filter col-> - turned_into_index = this.context.meta_index.exists i-> i.name == col.name - turned_into_index.not - Table this.name this.connection new_cols new_ctx - type Integrity_Error ## UNSTABLE diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso index 32b6fa192167..9e30c69542b7 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso @@ -10,6 +10,3 @@ export Standard.Database.Connection.Connection from Standard.Database.Connection.Database export all import Standard.Table.Data.Table -import Standard.Table.Data.Order_Rule -from Standard.Table.Data.Table export No_Such_Column_Error -from Standard.Table.Data.Order_Rule export Order_Rule diff --git a/distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso index 0fed3ca039fe..b838919ac333 100644 --- a/distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso @@ -278,14 +278,6 @@ transactions_table : Table.Table transactions_table = (Enso_Project.data / "food_shop_transactions.csv") . read -## An aggregate table for the relevant examples. -aggregate_table : Table.Aggregate_Table -aggregate_table = - transactions = here.transactions_table - item_names = here.inventory_table.at "item_name" - with_names = transactions.join item_names on="item_id" - with_names.group by="item_name" - ## An example regex match. match : Default_Engine.Match match = diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso index 3e2cf4440174..743acc49190d 100644 --- a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso +++ b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso @@ -49,10 +49,12 @@ break ties in descending order. import Standard.Examples + import Standard.Table.Data.Sort_Column_Selector + import Standard.Table.Data.Sort_Column example_sort = table = Examples.inventory_table - table.sort by=["total_stock", "sold_stock"] order=Sort_Direction.Descending + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending]) > Example Compute the number of transactions that each item has participated in, as diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso index e03e572a4ab7..4e2778e5c45e 100644 --- a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso +++ b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso @@ -11,9 +11,10 @@ Get the item name and price columns from the shop inventory. import Standard.Examples + from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name example_select = - Examples.inventory_table.select ["item_name", "price"] + Examples.inventory_table.select_columns (By_Name ["item_name", "price"]) > Example Remove any rows that contain missing values from the table. diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso index 94653376e564..6c502b14690c 100644 --- a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso +++ b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso @@ -24,14 +24,15 @@ example_map = Examples.integer_column.map (x -> x * x) > Example - Sort the shop inventory based on the per-item price in descending order and - placing missing values at the top of the table. + Sort the shop inventory based on the per-item price in descending order. import Standard.Examples + import Standard.Table.Data.Sort_Column_Selector + import Standard.Table.Data.Sort_Column example_sort = table = Examples.inventory_table - table.sort by="price" order=Sort_Direction.Descending missing_last=false + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending]) > Example Add two columns to each other. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/package.yaml b/distribution/lib/Standard/Table/0.0.0-dev/package.yaml index 9d9858ef5cfa..04f37e22703c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/package.yaml +++ b/distribution/lib/Standard/Table/0.0.0-dev/package.yaml @@ -29,10 +29,10 @@ component-groups: - Standard.Base.Join: exports: - Standard.Table.Data.Table.Table.join - - Standard.Table.Data.Table.Table.group + - Standard.Table.Data.Table.Table.aggregate - Standard.Base.Transform: exports: - - Standard.Table.Data.Table.Table.sort + - Standard.Table.Data.Table.Table.order_by - Standard.Table.Data.Table.Table.to_csv - Standard.Table.Data.Column.Column.to_table - Standard.Base.Output: diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 7397dd967392..31a46dc0242d 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -996,14 +996,15 @@ type Column Examples.decimal_column.sort comparator=my_comparator sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column sort order=Sort_Direction.Ascending missing_last=True comparator=Nothing = + comparator_to_java cmp x y = cmp x y . to_sign order_bool = case order of Sort_Direction.Ascending -> True Sort_Direction.Descending -> False java_cmp = case comparator of Nothing -> Nothing - cmp -> Table.comparator_to_java cmp + cmp -> comparator_to_java cmp rule = OrderBuilder.OrderRule.new this.java_column java_cmp order_bool missing_last - fallback_cmp = Table.comparator_to_java .compare_to + fallback_cmp = comparator_to_java .compare_to mask = OrderBuilder.buildOrderMask [rule].to_array fallback_cmp new_col = this.java_column.applyMask mask Column new_col diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Order_Rule.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Order_Rule.enso deleted file mode 100644 index aaaee19f0a1c..000000000000 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Order_Rule.enso +++ /dev/null @@ -1,31 +0,0 @@ -from Standard.Base import all - -type Order_Rule - - ## UNSTABLE - - A rule used for sorting table-like structures. - - Arguments: - - column: a value representing the data dimension by which this rule is - sorting. This type does not specify the underlying representation of a - column, assuming that the sorting engine defines its own column - representation. - - comparator: a function taking two elements of the data being sorted on - and returning an `Ordering`. The function may be `Nothing`, in which - case a natural ordering will be used. Note that certain table backends - (such us database connectors) may not support this field being set to a - non-`Nothing` value. - - order: specifies whether the table should be sorted in an ascending or - descending order. The default value of `Nothing` delegates the decision - to the sorting function. Can be set to `Sort_Direction.Ascending` or - `Sort_Direction.Descending` from the `Base` library, to specify the - ordering. - - missing_last: whether the missing values should be placed at the - beginning or end of the sorted table. Note that this argument is - independent from `order`, i.e. missing values will always be sorted - according to this rule, ignoring the ascending / descending setting. - The default value of `Nothing` delegates the decision to the sorting - function. - type Order_Rule column comparator=Nothing order=Nothing missing_last=Nothing - diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index b0df9a620136..e364eef5f7b0 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -14,7 +14,6 @@ import Standard.Table.Internal.Parse_Values_Helper import Standard.Table.Internal.Delimited_Reader import Standard.Table.Internal.Problem_Builder -from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index from Standard.Table.Data.Column_Type_Selection as Column_Type_Selection_Module import Column_Type_Selection, Auto from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter @@ -32,9 +31,9 @@ import Standard.Base.Data.Ordering.Comparator polyglot java import org.enso.table.data.table.Table as Java_Table polyglot java import org.enso.table.data.table.Column as Java_Column -polyglot java import org.enso.table.operations.OrderBuilder polyglot java import org.enso.table.format.csv.Writer as Csv_Writer polyglot java import org.enso.table.format.xlsx.Writer as Spreadsheet_Writer +polyglot java import org.enso.table.operations.OrderBuilder polyglot java import java.io.StringReader ## Creates a new table from a vector of `[name, items]` pairs. @@ -498,7 +497,6 @@ type Table new_names = this.columns.map mapper this.take_end (this.length - 1) . rename_columns (Column_Mapping.By_Position new_names) on_problems=on_problems - ## ALIAS group, summarize Aggregates the rows in a table using any `Group_By` entries in columns. @@ -572,6 +570,44 @@ type Table descending order. table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending]) + > Example + Sorting the shop inventory based on the per-item price in ascending + order. + + import Standard.Examples + + example_sort = Examples.inventory_table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price"]) + + > Example + Sort the shop inventory based on the per-item price in descending order + + import Standard.Examples + + example_sort = + table = Examples.inventory_table + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending]) + + > Example + Sort the shop inventory based on the total stock, using the number sold + to break ties in descending order. + + import Standard.Examples + + example_sort = + table = Examples.inventory_table + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock" Sort_Direction.Descending, Sort_Column.Name "sold_stock" Sort_Direction.Descending]) + + > Example + Sort the shop inventory in ascending order by the total stock, using + the number of items sold in descending order to break ties. + + import Standard.Examples + import Standard.Table + + example_sort = + table = Examples.inventory_table + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending]) + order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning = problem_builder = Problem_Builder.new @@ -749,20 +785,6 @@ type Table Nothing -> Error.throw No_Index_Set_Error i -> Column.Column i - ## Alias Select Columns - - Selects a subset of columns from this table by name. - - > Example - Get the item name and price columns from the shop inventory. - - import Standard.Examples - - example_select = - Examples.inventory_table.select ["item_name", "price"] - select : Vector -> Table - select columns = Table (this.java_table.selectColumns columns.to_array) - ## ALIAS Join Table Efficiently joins two tables based on either the index or the specified @@ -876,183 +898,6 @@ type Table cols = this.columns here.new [["Column", cols.map .name], ["Items Count", cols.map .count], ["Storage Type", cols.map .storage_type]] . set_index "Column" - ## ALIAS Group a Table - - Returns an aggregate table resulting from grouping the elements by the - value of the specified column. - - Arguments: - - by: The column in the table to perform grouping by. If this argument - is not set, the index is used for grouping instead. - - > Example - Compute the number of transactions that each item has participated in, - as well as the number of each item sold across those transactions. - - import Standard.Examples - import Standard.Table - - example_group = - transactions = Examples.transactions_table - item_names = Examples.inventory_table.at "item_name" - aggregated = transactions.group by="item_id" - num_transactions = aggregated.at "transaction_id" . reduce .length . rename "transaction_count" - num_sold = aggregated.at "quantity" . reduce .sum . rename "num_sold" - Table.join [item_names, num_transactions, num_sold] - group : Text | Nothing -> Aggregate_Table - group by=Nothing = - Aggregate_Table (this.java_table.group by) - - ## ALIAS Sort Table - UNSTABLE - - Sorts the table according to the specified rules. - - Arguments: - - by: Specifies the columns used for reordering the table. This argument - may be one of: - - a text: The text is treated as a column name. - - a column: Any column, that may or may not belong to this table. - Sorting by a column will result in reordering the rows of this - table in a way that would result in sorting the given column. - - an order rule: Specifies both the sorting column and additional - settings, that will take precedence over the global parameters of - this sort operation. The `column` field of the rule may be a text - or a column, with the semantics described above. - - a vector of any of the above: This will result in a hierarchical - sorting, such that the first rule is applied first, the second is - used for breaking ties, etc. - - order: Specifies the default sort order for this operation. All the - rules specified in the `by` argument will default to this setting, - unless specified in the rule. - - missing_last: Specifies the default placement of missing values when - compared to non-missing ones. This setting may be overriden by the - particular rules of the `by` argument. Note thet this argument is - independent from `order`, i.e. missing values will always be sorted - according to this rule, ignoring the ascending / descending setting. - - > Example - Sorting the shop inventory based on the per-item price in ascending - order. - - import Standard.Examples - - example_sort = Examples.inventory_table.sort by="price" - - > Example - Sort the shop inventory based on the per-item price in descending order - and placing missing values at the top of the table. - - import Standard.Examples - - example_sort = - table = Examples.inventory_table - table.sort by="price" order=Sort_Direction.Descending missing_last=false - - > Example - Sort the shop inventory based on the total stock, using the number sold - to break ties in descending order. - - import Standard.Examples - - example_sort = - table = Examples.inventory_table - table.sort by=["total_stock", "sold_stock"] order=Sort_Direction.Descending - - > Example - Sort the shop inventory in ascending order by the total stock, using - the number of items sold in descending order to break ties. - - import Standard.Examples - import Standard.Table - - example_sort = - table = Examples.inventory_table - sold_stock_rule = Table.Order_Rule "sold_stock" order=Sort_Direction.Descending - table.sort by=["total_stock", sold_stock_rule] - - > Example - Sorting the inventory in descending order based on the percentage of - the total stock sold, using the popularity of the product to break - ties. - - import Standard.Examples - import Standard.Table - - example_sort = - table = Examples.inventory_table.join Examples.popularity_table - percentage_sold = table.at "sold_stock" / table.at "total_stock" - table.sort by=[percentage_sold, "popularity"] order=Sort_Direction.Descending - - > Example - Sort the inventory by the price using a custom comparator function. - - import Standard.Examples - import Standard.Table - - example_sort = - table = Examples.inventory_table - comparator a b = a.compare_to b*2 - price_rule = Table.Order_Rule "price" comparator=comparator - table.sort by=price_rule - sort : Text | Column.Column | Order_Rule | Vector.Vector (Text | Column.Column | Order_Rule) -> Sort_Direction -> Boolean -> Table - sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <| - rules = this.build_java_order_rules by order missing_last - fallback_cmp = here.comparator_to_java .compare_to - mask = OrderBuilder.buildOrderMask rules.to_array fallback_cmp - new_table = this.java_table.applyMask mask - Table new_table - - ## PRIVATE - - Transforms order rules from Enso into Java. - - Arguments: - - rules: The rule(s) to convert. - - order: The sorting order. - - missing_last: Whether or not missing values should be ordered last. - build_java_order_rules : (Text | Column.Column. | Order_Rule | Vector (Text | Column.Column | Order_Rule)) -> Sort_Direction -> Boolean -> Vector - build_java_order_rules rules order missing_last = case rules of - Text -> [this.build_java_order_rule rules order missing_last] - Column.Column _ -> [this.build_java_order_rule rules order missing_last] - Order_Rule _ _ _ _ -> [this.build_java_order_rule rules order missing_last] - Vector.Vector _ -> rules.map (this.build_java_order_rule _ order missing_last) - - ## PRIVATE - - Builds a java order rule. - - Arguments: - - rule: The rule to convert. - - order: The sort order. - - missing_last: Whether or not missing values should be ordered last. - build_java_order_rule : (Text | Column.Column | Order_Rule) -> Sort_Direction -> Boolean -> OrderRule - build_java_order_rule rule order missing_last = - order_bool = case order of - Sort_Direction.Ascending -> True - Sort_Direction.Descending -> False - case rule of - Text -> - column = Panic.rethrow (this.at rule) - OrderBuilder.OrderRule.new column.java_column Nothing order_bool missing_last - Column.Column c -> - OrderBuilder.OrderRule.new c Nothing order_bool missing_last - Order_Rule col_ref cmp rule_order rule_nulls_last -> - c = case col_ref of - Text -> this.at col_ref . java_column - Column.Column c -> c - o = case rule_order of - Nothing -> order_bool - Sort_Direction.Ascending -> True - Sort_Direction.Descending -> False - nulls = case rule_nulls_last of - Nothing -> missing_last - _ -> rule_nulls_last - java_cmp = case cmp of - Nothing -> Nothing - c -> here.comparator_to_java c - OrderBuilder.OrderRule.new c java_cmp o nulls - ## UNSTABLE Concatenates `other` to `this`. @@ -1414,86 +1259,6 @@ Text.write_to_spreadsheet cell = cell.setCellValue this which should be set by this method. Date.write_to_spreadsheet cell = cell.setCellValue this.internal_local_date - - -## Represents a table with grouped rows. -type Aggregate_Table - - ## PRIVATE - - A table type with grouped rows. - - Arguments: - - java_table: The internal representation of the table. - type Aggregate_Table java_table - - ## Returns a vector of aggregate columns in this table. - - > Example - Get a vector of aggregate columns from this table. - - import Standard.Examples - - example_columns = Examples.aggregate_table.columns - columns : Vector.Vector - columns = Vector.Vector this.java_table.getColumns . map Column.Aggregate_Column - - ## Returns a table containing columns resulting from calling `values` on - each column in `this`. - - > Example - Get the values table from an aggregate table. - - import Standard.Examples - - example_values = Examples.aggregate_table.values - values : Table - values = this.columns . map (_.values name_suffix='') . reduce .join - - ## Returns a column containing the number of elements in each group of the - aggregate table. - - > Examples - Get the counts for an aggregate table. - - import Standard.Examples - - example_count = Examples.aggregate_table.count - count : Column - count = Column.Column this.java_table.count - - ## ALIAS Get a Column - - Returns an aggregate column with the given name, contained in this table. - - Arguments: - - name: The name of the aggregate column to get. - - > Example - Get the transaction ids column from the aggregate table. - - import Standard.Examples - - example_at = Examples.aggregate_table.at "transaction_id" - at : Text -> Column ! No_Such_Column_Error - at name = case this.java_table.getColumnByName name of - Nothing -> Error.throw (No_Such_Column_Error name) - c -> Column.Aggregate_Column c - - ## Prints an ASCII-art table with this data to the standard output. - - Arguments: - - show_rows: the number of initial rows that should be displayed. - - > Example - Pretty-print and display an aggregate table in the console. - - import Standard.Examples - - example_print = Examples.aggregate_table.print - print : Integer -> Nothing - print show_rows=10 = this.values.print show_rows - ## UNSTABLE An error returned when a non-existent column is being looked up. @@ -1588,17 +1353,6 @@ print_table header rows indices_count format_term = " " + y ([" " + header_line, divider] + row_lines).join '\n' -## PRIVATE - - Wraps the Enso comparator function so it's usable in Java. - - Arguments: - - cmp: The Enso comparator function. - - x: The left operand to the comparator. - - y: The right operand to the comparator. -comparator_to_java : (Any -> Any -> Ordering) -> Any -> Any -> Integer -comparator_to_java cmp x y = cmp x y . to_sign - Table.from (that : Text) (format:File_Format.Delimited|File_Format.Fixed_Width = File_Format.Delimited '\t') (on_problems:Problem_Behavior=Report_Warning) = java_reader = StringReader.new that Delimited_Reader.read_from_reader format java_reader on_problems diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index ea4e59eee68e..0223cda69385 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -1,6 +1,5 @@ from Standard.Base import all -from Standard.Table.Data.Table as Table_Module import No_Such_Column_Error from Standard.Table.Data.Column as Column_Module import Column from Standard.Table.Data.Aggregate_Column import all from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso index c2b9609d60a7..dbd6a2072f75 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso @@ -7,7 +7,6 @@ import Standard.Table.Io.Spreadsheet import Standard.Table.Io.Spreadsheet_Write_Mode import Standard.Table.Data.Table import Standard.Table.Data.Column -import Standard.Table.Data.Order_Rule import Standard.Table.Model from Standard.Table.Io.Excel export Excel_Section, Excel_Range, read_excel @@ -19,7 +18,6 @@ export Standard.Table.Model export Standard.Table.Io.File_Read from Standard.Table.Data.Table export new, from_rows, join, concat, No_Such_Column_Error, Table -from Standard.Table.Data.Order_Rule export Order_Rule ## ALIAS To Table diff --git a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso index e49840fd1546..86f427d90605 100644 --- a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso +++ b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso @@ -1,4 +1,5 @@ from Standard.Base import all +from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name import Standard.Table.Data.Table as Dataframe_Table import Standard.Table.Data.Column as Dataframe_Column @@ -31,8 +32,8 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of # Materialize a table with indices as normal columns (because dataframe does not support multi-indexing). df = x.reset_index.to_dataframe max_rows # Then split into actual columns and indices. - vis_df = df.select (x.columns.map .name) - indices = df.select (x.indices.map .name) . columns + vis_df = df.select_columns (By_Name (x.columns.map .name)) + indices = df.select_columns (By_Name (x.indices.map .name)) . columns all_rows_count = x.row_count here.make_json vis_df indices all_rows_count @@ -43,14 +44,9 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of here.prepare_visualization x.to_table max_rows # We display aggregates as their ungrouped counterparts. - Dataframe_Table.Aggregate_Table _ -> - ungrouped = Dataframe_Table.Table x.java_table.getUnderlyingTable - here.prepare_visualization ungrouped max_rows Dataframe_Column.Aggregate_Column _ -> ungrouped = Dataframe_Column.Column x.java_column.getColumn here.prepare_visualization ungrouped.to_table max_rows - Database_Table.Aggregate_Table _ _ _ _ -> - here.prepare_visualization x.ungrouped max_rows Database_Column.Aggregate_Column_Builder _ _ _ _ _ -> here.prepare_visualization x.ungrouped.to_table max_rows diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java index 880b446498d2..ae1e5155d5c2 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java @@ -10,7 +10,6 @@ import org.enso.table.data.index.Index; import org.enso.table.data.index.MultiValueIndex; import org.enso.table.data.mask.OrderMask; -import org.enso.table.data.table.aggregate.AggregateTable; import org.enso.table.data.table.problems.AggregatedProblems; import org.enso.table.error.NoSuchColumnException; import org.enso.table.error.UnexpectedColumnTypeException; @@ -468,11 +467,6 @@ private Table hconcat(Table other, String lsuffix, String rsuffix) { return new Table(newColumns, index); } - public AggregateTable group(String by) { - Table t = by == null ? this : indexFromColumn(by); - return new AggregateTable(t); - } - /** @return a copy of the Column containing a slice of the original data */ public Table slice(int offset, int limit) { Column[] newColumns = new Column[columns.length]; diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateColumn.java b/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateColumn.java deleted file mode 100644 index 8aa00e26e62f..000000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateColumn.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.enso.table.data.table.aggregate; - -import org.enso.table.data.column.operation.aggregate.Aggregator; -import org.enso.table.data.index.Index; -import org.enso.table.data.table.Column; - -import java.util.List; -import java.util.function.Function; -import java.util.stream.IntStream; - -/** A column wrapper used for aggregation operations. */ -public class AggregateColumn { - private final Index uniqueIndex; - private final Column column; - - /** - * Creates a new column - * - * @param uniqueIndex the unique index obtained from the column's index - * @param column the wrapped column - */ - public AggregateColumn(Index uniqueIndex, Column column) { - this.uniqueIndex = uniqueIndex; - this.column = column; - } - - /** - * Aggregates the groups using a given aggregation operation. - * - * @param aggName name of a vectorized operation that can be used if possible. If null is passed, - * this parameter is unused. - * @param outSuffix a string appended to the name of the resulting column. - * @param aggregatorFunction the function to use if a vectorized operation is not available. - * @param skipNa whether missing values should be passed to the {@code fallback} function. - * @return a column indexed by the unique index of this aggregate, storing results of applying the - * specified operation. - */ - public Column aggregate( - String aggName, - String outSuffix, - Function, Object> aggregatorFunction, - boolean skipNa) { - Aggregator aggregator = - column.getStorage().getAggregator(aggName, aggregatorFunction, skipNa, uniqueIndex.size()); - - for (int i = 0; i < uniqueIndex.size(); i++) { - IntStream ixes = - column.getIndex().loc(uniqueIndex.iloc(i)).stream().mapToInt(Integer::intValue); - aggregator.nextGroup(ixes); - } - return new Column(column.getName() + outSuffix, uniqueIndex, aggregator.seal()); - } - - /** @return the underlying (ungrouped) column. */ - public Column getColumn() { - return column; - } -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateTable.java b/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateTable.java deleted file mode 100644 index 229345013bcc..000000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateTable.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.enso.table.data.table.aggregate; - -import org.enso.table.data.column.storage.LongStorage; -import org.enso.table.data.index.Index; -import org.enso.table.data.table.Column; -import org.enso.table.data.table.Table; - -import java.util.Arrays; -import java.util.List; - -/** Represents a table grouped by a given index. */ -public class AggregateTable { - private final Table table; - private final Index uniqueIndex; - - /** @param table the underlying table */ - public AggregateTable(Table table) { - this.table = table; - this.uniqueIndex = table.getIndex().unique(); - } - - /** @return a column containing group sizes in this aggregate. */ - public Column count() { - long[] counts = new long[uniqueIndex.size()]; - for (int i = 0; i < uniqueIndex.size(); i++) { - List items = table.getIndex().loc(uniqueIndex.iloc(i)); - counts[i] = items == null ? 0 : items.size(); - } - LongStorage storage = new LongStorage(counts); - return new Column("count", uniqueIndex, storage); - } - - /** - * Returns a column with the given name. - * - * @param n the column name - * @return column with the given name or null if does not exist - */ - public AggregateColumn getColumnByName(String n) { - Column c = table.getColumnByName(n); - if (c == null) { - return null; - } else { - return new AggregateColumn(uniqueIndex, c); - } - } - - /** @return Aggregate columns contained in this table. */ - public AggregateColumn[] getColumns() { - return Arrays.stream(table.getColumns()) - .map(c -> new AggregateColumn(uniqueIndex, c)) - .toArray(AggregateColumn[]::new); - } - - /** @return the underlying (ungrouped) table. */ - public Table getUnderlyingTable() { - return table; - } -} diff --git a/test/Examples_Tests/README.md b/test/Examples_Tests/README.md new file mode 100644 index 000000000000..bf731920bf64 --- /dev/null +++ b/test/Examples_Tests/README.md @@ -0,0 +1 @@ +This is a set of tests for the `Examples` library for Enso. diff --git a/test/Examples_Tests/package.yaml b/test/Examples_Tests/package.yaml new file mode 100644 index 000000000000..4e8c3968b7b1 --- /dev/null +++ b/test/Examples_Tests/package.yaml @@ -0,0 +1,7 @@ +name: Tests +namespace: enso_dev +enso-version: default +version: 0.0.1 +license: MIT +author: enso-dev@enso.org +maintainer: enso-dev@enso.org diff --git a/test/Tests/src/Examples_Spec.enso b/test/Examples_Tests/src/Examples_Spec.enso similarity index 95% rename from test/Tests/src/Examples_Spec.enso rename to test/Examples_Tests/src/Examples_Spec.enso index 34acbc2125bd..4ca74f59765c 100644 --- a/test/Tests/src/Examples_Spec.enso +++ b/test/Examples_Tests/src/Examples_Spec.enso @@ -112,17 +112,11 @@ spec = Test.group "Examples" <| Examples.text_column_1 Examples.text_column_2 - Test.specify "should provide an aggregate column" <| - Examples.aggregate_column - Test.specify "should provide various example tables" <| Examples.inventory_table Examples.popularity_table Examples.transactions_table - Test.specify "should provide an aggregate table" <| - Examples.aggregate_table - Test.specify "should provide an example of a regex match" <| match = Examples.match match.groups.length . should_equal 5 diff --git a/test/Examples_Tests/src/Main.enso b/test/Examples_Tests/src/Main.enso new file mode 100644 index 000000000000..d696586cc802 --- /dev/null +++ b/test/Examples_Tests/src/Main.enso @@ -0,0 +1,8 @@ +from Standard.Base import all + +import Standard.Test + +import project.Examples_Spec + +main = Test.Suite.run_main <| + Examples_Spec.spec diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 44af889f006e..9fa97d421c56 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -830,7 +830,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]] result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")] result.row_count . should_equal 2 - materialized = materialize result . sort "A" + materialized = materialize result . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) materialized.columns.length . should_equal 2 materialized.columns.at 0 . name . should_equal "A" materialized.columns.at 0 . to_vector . should_equal ["bar", "foo"] @@ -910,14 +910,14 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te r1 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=True)] r1.row_count . should_equal 2 - m1 = materialize r1 . sort "G" + m1 = materialize r1 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"]) m1.columns.length . should_equal 2 m1.columns.first.to_vector . should_equal ["bar", "foo"] m1.columns.second.to_vector . should_equal [0, 1] r2 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=False)] r2.row_count . should_equal 2 - m2 = materialize r2 . sort "G" + m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"]) m2.columns.length . should_equal 2 m2.columns.first.to_vector . should_equal ["bar", "foo"] m2.columns.second.to_vector . should_equal [1, 2] @@ -959,7 +959,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te r2 = table.aggregate [Group_By "G", Average "X"] r2.row_count.should_equal 2 - m2 = materialize r2 . sort "G" + m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"]) m2.columns.length . should_equal 2 m2.columns.first.to_vector . should_equal ["a", "b"] m2.columns.second.to_vector . should_equal [0.5, 1] @@ -1145,7 +1145,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te table = table_builder [["A", [1, 1, 2, 1]], ["B", [3, 2, 2, 3]], ["C", [11, 12, 13, 14]]] grouped = table.aggregate [Group_By "B", Group_By "A"] grouped.row_count . should_equal 3 - materialized = materialize grouped . sort ["A", "B"] + materialized = materialize grouped . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B"]) materialized.columns.length . should_equal 2 materialized.columns.at 1 . name . should_equal "A" materialized.columns.at 1 . to_vector . should_equal [1, 1, 2] diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 4da2c8f66dc8..b9e2c3b77b4c 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -1,14 +1,19 @@ from Standard.Base import all +from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name +import Standard.Table.Data.Sort_Column_Selector +import Standard.Table.Data.Sort_Column import project.Database.Helpers.Fake_Test_Connection import Standard.Database.Data.Dialect import Standard.Database.Data.Table as Table_Module import Standard.Test +import Standard.Test.Problems from Standard.Table.Data.Aggregate_Column import all from Standard.Database import all from Standard.Database.Data.Sql import Sql_Type -from Standard.Table import No_Such_Column_Error, Order_Rule +from Standard.Table import No_Such_Column_Error +from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error spec = @@ -38,7 +43,7 @@ spec = Test.group "[Codegen] Basic Select" <| Test.specify "should select columns from a table" <| t1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []] - t2 = t1.select ["C", "B", "undefined"] + t2 = t1.select_columns (By_Name ["C", "B", "undefined"]) reorder=True t2.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B" FROM "T1" AS "T1"', []] foo = t1.at "A" . rename "FOO" @@ -47,7 +52,7 @@ spec = t3 = t2.set "bar" foo t3.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B", "T1"."A" AS "bar" FROM "T1" AS "T1"', []] - Test.specify "should fail if at is called for a nonexisting column" <| + Test.specify "should fail if at is called for a non-existent column" <| t1.at "undefined" . should_fail_with No_Such_Column_Error Test.specify "should allow to limit the amount of returned results" <| @@ -55,7 +60,7 @@ spec = t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" LIMIT 5', []] Test.specify "should work correctly when there are no columns" <| - empty = t1.select [] + empty = t1.select_columns (By_Name []) json = Json.from_pairs [["query", Nothing], ["message", "The table has no columns so a query cannot be generated."]] empty.to_json . should_equal json empty.columns.length . should_equal 0 @@ -146,28 +151,26 @@ spec = Test.group "[Codegen] Sorting" <| Test.specify "should allow sorting by a single column name" <| - r1 = t1.sort by="A" . at "B" - r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST', []] + r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . at "B" + r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC', []] - r2 = t1.sort by="B" missing_last=False order=Sort_Direction.Descending . at "A" - r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC NULLS FIRST', []] + r2 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "B" Sort_Direction.Descending]) . at "A" + r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC', []] Test.specify 'should allow sorting by multiple column names' <| - r1 = t1.sort by=['A', 'B'] - r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" ASC NULLS LAST', []] - - Test.specify 'should allow sorting by expressions' <| - sum = t1.at 'A' + t1.at 'B' - r1 = t1.sort by=sum . at "C" - r1.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY ("T1"."A" + "T1"."B") ASC NULLS LAST', []] + r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'A', Sort_Column.Name 'B']) + r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" ASC', []] Test.specify 'should allow sorting with specific by-column rules' <| - r1 = t1.sort by=['A', (Order_Rule 'B' order=Sort_Direction.Descending)] - r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" DESC NULLS LAST', []] - - Test.specify 'should return dataflow error when passed a non-existent column' <| - r = t1.sort by='foobar' - r.should_fail_with No_Such_Column_Error + r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending]) + r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" DESC', []] + + Test.specify 'should return warnings and errors when passed a non-existent column' <| + action = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_ + tester table = + table.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []] + problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected] + Problems.test_problem_handling action problems tester Test.group "Helpers" <| Test.specify "combine_names should combine lists of names" <| diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index 7395898473e8..ed25a84d9a99 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -1,7 +1,14 @@ from Standard.Base import all +from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name + from Standard.Database import all + import Standard.Table.Data.Table as Materialized_Table +import Standard.Table.Data.Sort_Column_Selector +import Standard.Table.Data.Sort_Column import Standard.Test +import Standard.Test.Problems +from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns import project.Database.Helpers.Name_Generator from Standard.Table.Data.Aggregate_Column import all @@ -46,7 +53,7 @@ spec prefix connection pending=Nothing = ix2.name . should_equal 'a' ix2.to_vector . should_equal [1, 4] Test.specify "should work correctly when there are no columns" <| - empty = t1.select [] + empty = t1.select_columns (By_Name []) empty.to_dataframe.columns.length . should_equal 0 empty.to_dataframe.row_count . should_equal empty.row_count Test.specify "should handle bigger result sets" <| @@ -125,24 +132,24 @@ spec prefix connection pending=Nothing = the Dataframes library, so it is independent of the library under testing here. Test.specify "should allow joining tables index-on-index" <| - r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . sort by=['y', 'z'] + r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] - r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . sort by=['x', 'w'] + r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w']) r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6] r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] Test.specify "should allow joining tables column-on-index" <| - r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . sort by=['y', 'z'] + r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] - r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . sort by=['x', 'w'] + r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w']) r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6] r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] Test.specify "should allow self-joins and append suffixes to disambiguate column names" <| - r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . sort by='x' + r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x']) r_1.columns.map .name . should_equal ['x', 'y_left', 'y_right'] r_1.at 'x' . to_vector . should_equal [0, 1, 3, 6, 7] expected_y = ['foo', 'bar', 'spam', 'eggs', 'baz'] @@ -159,8 +166,8 @@ spec prefix connection pending=Nothing = ta_2 = ta.set_index "id" tb_2 = tb.set_index "id" res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b" - sel = res.select ["name_a", "name_b"] - df = sel.to_dataframe . sort by="name_a" + sel = res.select_columns (By_Name ["name_a", "name_b"]) + df = sel.to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "name_a"]) df . at "name_a" . to_vector . should_equal ["Foo", "Hmm"] df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"] @@ -201,41 +208,6 @@ spec prefix connection pending=Nothing = empty.columns.length . should_equal 0 empty.to_dataframe.columns.length . should_equal 0 - Test.group prefix+"Old Aggregation" pending=pending <| - t = upload "T6" <| - Materialized_Table.new [["name", ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]], ["price", [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]], ["quantity", [10, 20, 30, 40, 50, 60, 70]]] - agg = t.group by='name' - ## A helper which makes sure that the groups are ordered according to the index, using the Table library - determinize col = - df = col.to_dataframe.to_table - df.sort by=df.index . at col.name - - Test.specify "should allow counting group sizes" <| - determinize agg.count . to_vector . should_equal [2, 1, 3, 1] - - Test.specify "should allow aggregating columns with basic arithmetic aggregators" <| - determinize (agg.at 'price' . mean) . to_vector . should_equal [50.25, 6.7, 0.4, Nothing] - determinize (agg.at 'price' . min) . to_vector . should_equal [3.5, 6.7, 0.4, Nothing] - determinize (agg.at 'price' . max) . to_vector . should_equal [97, 6.7, 0.4, Nothing] - - Test.specify "should allow to join multiple aggregations" <| - m1 = agg.at 'price' . mean - m2 = agg.at 'quantity' . max - df = (m1.join m2).to_dataframe - df2 = df.sort by=df.index - df2.at 'price_mean' . to_vector . should_equal [50.25, 6.7, 0.4, Nothing] - df2.at 'quantity_max' . to_vector . should_equal [60, 40, 50, 70] - - Test.specify "should correctly compute the result size" <| - m = agg.at 'price' . mean - m.length . should_equal m.to_vector.length - m.length . should_equal 4 - - Test.specify "should correctly count values" <| - m = agg.at 'price' . mean - m.count . should_equal 3 - m.count_missing . should_equal 1 - Test.group prefix+"Column-wide statistics" pending=pending <| Test.specify 'should allow computing basic column-wide stats' <| t7 = upload "T7" <| @@ -251,38 +223,29 @@ spec prefix connection pending=Nothing = Materialized_Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]] Test.specify "should allow sorting by a single column name" <| - r_1 = df.sort by="quantity" + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity']) r_1.at 'id' . to_vector . should_equal [2,4,1,3,5,6] - r_2 = df.sort by="rating" missing_last=False - r_2.at 'id' . to_vector . should_equal [2,6,5,1,4,3] - - r_3 = df.sort by="rating" missing_last=False order=Sort_Direction.Descending - r_3.at 'id' . to_vector . should_equal [2,6,3,1,4,5] + r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending]) + r_3.at 'id' . to_vector . should_equal [3,1,4,5,2,6] Test.specify 'should allow sorting by multiple column names' <| - r_1 = df.sort by=['quantity', 'rating'] - r_1.at 'id' . to_vector . should_equal [4,2,1,3,5,6] + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity', Sort_Column.Name 'rating']) + r_1.at 'id' . to_vector . should_equal [2,4,1,3,6,5] - r_2 = df.sort by=['rating', 'quantity'] missing_last=False order=Sort_Direction.Descending - r_2.at 'id' . to_vector . should_equal [6,2,3,1,4,5] - - Test.specify 'should allow sorting by external columns' <| - quality_ratio = df.at 'rating' / df.at 'price' - - r_1 = df.sort by=quality_ratio - r_1.at 'id' . to_vector . should_equal [4,1,3,5,2,6] - - r_2 = df.sort by=['quantity', quality_ratio] - r_2.at 'id' . to_vector . should_equal [4,2,1,3,5,6] + r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending, Sort_Column.Name 'quantity' Sort_Direction.Descending]) + r_2.at 'id' . to_vector . should_equal [3,1,4,5,6,2] Test.specify 'should allow sorting with specific by-column rules' <| - r_1 = df.sort by=['quantity', (Order_Rule 'price' order=Sort_Direction.Descending)] + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending]) r_1.at 'id' . to_vector . should_equal [4,2,3,1,6,5] - Test.specify 'should return dataflow error when passed a non-existent column' <| - r = df.sort by='foobar' - r.should_fail_with No_Such_Column_Error + Test.specify 'should return warnings and errors when passed a non-existent column' <| + action = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_ + tester table = + table.at 'id' . to_vector . should_equal [1,2,3,4,5,6] + problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected] + Problems.test_problem_handling action problems tester Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <| ints = [1, 2, 3, 4, 5] @@ -291,7 +254,7 @@ spec prefix connection pending=Nothing = texts = ["foo", "foo", "bar", "baz", "spam"] df = upload "T8" <| Materialized_Table.new [["ord", [0,3,2,4,1]], ["ints", ints], ["reals", reals], ["bools", bools], ["texts", texts]] - r = df.sort by='ord' + r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord']) r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4] df.at 'ints' . to_vector . should_equal ints @@ -309,22 +272,17 @@ spec prefix connection pending=Nothing = c = df.at 'rating' r_1 = c.sort - r_1.to_vector.should_equal [2.2, 3.0, 3.0, 7.3, Nothing, Nothing] + r_1.to_vector.should_equal [Nothing, Nothing, 2.2, 3.0, 3.0, 7.3] r_2 = c.sort order=Sort_Direction.Descending r_2.to_vector.should_equal [7.3, 3.0, 3.0, 2.2, Nothing, Nothing] - r_3 = c.sort order=Sort_Direction.Descending missing_last=False - r_3.to_vector.should_equal [Nothing, Nothing, 7.3, 3.0, 3.0, 2.2] - Test.group prefix+"Index" pending=pending <| t0 = upload "Tix" <| Materialized_Table.new [["ix", [1,2,3]], ["c1", [4,5,6]]] t = t0.set_index 'ix' Test.specify "should be accessible by `at` like other columns" <| t.at 'ix' . to_vector . should_equal t.index.to_vector - Test.specify "should be accessible by `select` like other columns" <| - t.select ['ix'] . columns . first . to_vector . should_equal t.index.to_vector Test.specify "treated as a column indexed by itself should still correctly compute values" <| col = t.index+10 vec = [11, 12, 13] @@ -360,7 +318,7 @@ spec prefix connection pending=Nothing = (InMemory) table are ordered according to a specified column or list of columns. determinize_by order_column table = - table.sort by=order_column + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name order_column]) Test.specify "should allow counting group sizes and elements" <| aggregates = [Count Nothing, Count_Not_Nothing "price", Count_Nothing "price"] diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 82fbf7b9e97f..feb720fb5287 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -1,5 +1,7 @@ from Standard.Base import all from Standard.Table import all +import Standard.Table.Data.Sort_Column_Selector +import Standard.Table.Data.Sort_Column from Standard.Table.Data.Table as Table_Internal import Empty_Error @@ -8,7 +10,7 @@ import Standard.Table.Data.Storage import Standard.Test import Standard.Test.Problems import Standard.Visualization -from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names +from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns import project.Common_Table_Spec @@ -321,26 +323,6 @@ spec = i.at "Items Count" . to_vector . should_equal [3, 2, 4] i.at "Storage Type" . to_vector . should_equal [Storage.Text, Storage.Integer, Storage.Any] - Test.group "Aggregation" <| - name = ['name', ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]] - price = ['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]] - quantity = ['quantity', [10, 20, 30, 40, 50, 60, 70]] - t = Table.new [name, price, quantity] - agg = t.group by='name' - - Test.specify "should allow counting group sizes" <| - agg.count.to_vector.should_equal [3, 2, 1, 1] - - Test.specify "should allow aggregating columns with basic arithmetic aggregators" <| - agg.at 'price' . mean . to_vector . should_equal [0.4, 50.25, 6.7, Nothing] - agg.at 'price' . min . to_vector . should_equal [0.4, 3.5, 6.7, Nothing] - - Test.specify "should allow aggregating with user-defined aggregate functions" <| - median vec = - sorted = vec.sort - if sorted.is_empty then Nothing else sorted.at (sorted.length-1 / 2).floor - agg.at 'quantity' . reduce median . to_vector . should_equal [30, 20, 40, 70] - Test.group "Column-wide statistics" <| Test.specify 'should allow computing basic column-wide stats' <| price = Column.from_vector 'price' [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing] @@ -353,54 +335,36 @@ spec = df = (Enso_Project.data / "clothes.csv").read Test.specify "should allow sorting by a single column name" <| - r_1 = df.sort by="Quantity" + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity"]) r_1.at 'Id' . to_vector . should_equal [2,4,1,3,5,6] - r_2 = df.sort by="Rating" missing_last=False - r_2.at 'Id' . to_vector . should_equal [2,6,5,1,4,3] - - r_3 = df.sort by="Rating" missing_last=False order=Sort_Direction.Descending - r_3.at 'Id' . to_vector . should_equal [2,6,3,1,4,5] + r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Rating" Sort_Direction.Descending]) + r_3.at 'Id' . to_vector . should_equal [3,1,4,5,2,6] Test.specify 'should allow sorting by multiple column names' <| - r_1 = df.sort by=['Quantity', 'Rating'] - r_1.at 'Id' . to_vector . should_equal [4,2,1,3,5,6] - - r_2 = df.sort by=['Rating', 'Quantity'] missing_last=False order=Sort_Direction.Descending - r_2.at 'Id' . to_vector . should_equal [6,2,3,1,4,5] - - Test.specify 'should allow sorting by external columns' <| - quality_ratio = df.at 'Rating' / df.at 'Price' - - r_1 = df.sort by=quality_ratio - r_1.at 'Id' . to_vector . should_equal [4,1,3,5,2,6] + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating']) + r_1.at 'Id' . to_vector . should_equal [2,4,1,3,6,5] - r_2 = df.sort by=['Quantity', quality_ratio] - r_2.at 'Id' . to_vector . should_equal [4,2,1,3,5,6] + r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Rating' Sort_Direction.Descending, Sort_Column.Name 'Quantity' Sort_Direction.Descending]) + r_2.at 'Id' . to_vector . should_equal [3,1,4,5,6,2] Test.specify 'should allow sorting with specific by-column rules' <| - r_1 = df.sort by=['Quantity', (Order_Rule 'Price' order=Sort_Direction.Descending)] + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity", Sort_Column.Name "Price" Sort_Direction.Descending]) r_1.at 'Id' . to_vector . should_equal [4,2,3,1,6,5] Test.specify 'should respect defined comparison operations for custom types' <| c_1 = ['id', [1, 2, 3, 4, 5, 6]] c_2 = ['val', [My 1 2, My 3 4, My 2 1, My 5 2, My 7 0, My 4 -1]] df = Table.new [c_1, c_2] - r = df.sort by='val' + r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'val']) r.at 'id' . to_vector . should_equal [1,3,6,2,4,5] - Test.specify 'should allow passing a custom comparator per column and should missing-proof it' <| - c_1 = ['id', [1, 2, 3, 4, 5, 6]] - c_2 = ['val', [My 1 2, My 2 5, My 3 4, My 6 3, Nothing, My 1 0]] - df = Table.new [c_1, c_2] - - cmp a b = (a.x-a.y).abs . compare_to (b.x-b.y).abs - r = df.sort by=(Order_Rule 'val' comparator=cmp) - r.at 'id' . to_vector . should_equal [1,3,6,2,4,5] - - Test.specify 'should return dataflow error when passed a non-existent column' <| - r = df.sort by='foobar' - r.should_fail_with No_Such_Column_Error + Test.specify 'should return warnings and errors when passed a non-existent column' <| + action = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_ + tester table = + table.at 'Id' . to_vector . should_equal [1,2,3,4,5,6] + problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected] + Problems.test_problem_handling action problems tester Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <| ord = [0, 3, 2, 4, 1] @@ -411,7 +375,7 @@ spec = objs = [Cons 1 2, Cons 2 3, Cons 6 7, Cons 8 9, Cons 10 30] df = Table.new [['ord', ord], ['ints', ints], ['reals', reals], ['bools', bools], ['texts', texts], ['objs', objs]] - r = df.sort by='ord' + r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord']) r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4] df.at 'ints' . to_vector . should_equal ints @@ -507,9 +471,6 @@ spec = i = t.index c.to_vector . should_equal i.to_vector - Test.specify "should be accessible by `select` like other columns" <| - t.select ['ix'] . columns . first . to_vector . should_equal t.index.to_vector - Test.specify "should be able to be set by column" <| with_index = t.set_index c with_index.index.to_vector . should_equal c.to_vector diff --git a/test/Tests/src/Main.enso b/test/Tests/src/Main.enso index 1fc32761354c..bc9e8af8f923 100644 --- a/test/Tests/src/Main.enso +++ b/test/Tests/src/Main.enso @@ -60,8 +60,6 @@ import project.System.File_Spec import project.System.Process_Spec import project.System.Reporting_Stream_Decoder_Spec -import project.Examples_Spec - main = Test.Suite.run_main <| Any_Spec.spec Array_Spec.spec @@ -70,7 +68,6 @@ main = Test.Suite.run_main <| Conversion_Spec.spec Deep_Export_Spec.spec Error_Spec.spec - Examples_Spec.spec File_Spec.spec Reporting_Stream_Decoder_Spec.spec Http_Header_Spec.spec diff --git a/test/Visualization_Tests/src/Table_Spec.enso b/test/Visualization_Tests/src/Table_Spec.enso index d566b8c899fb..2251e52eed19 100644 --- a/test/Visualization_Tests/src/Table_Spec.enso +++ b/test/Visualization_Tests/src/Table_Spec.enso @@ -3,6 +3,7 @@ from Standard.Base import all from Standard.Database import all import Standard.Database.Data.Table as Database_Table import Standard.Table.Data.Table as Dataframe_Table +from Standard.Table.Data.Aggregate_Column import Group_By, Average import Standard.Visualization.Table.Visualization as Visualization import Standard.Test @@ -47,19 +48,9 @@ visualization_spec connection = json = make_json header=["A"] data=[['a', 'a']] all_rows=3 ixes_header=[] ixes=[] vis . should_equal json - g = t.group by=["A", "B"] . at "C" . mean + g = t.aggregate [Group_By "A", Group_By "B", Average "C"] . at "Average C" vis2 = Visualization.prepare_visualization g 1 - json2 = make_json header=["C_mean"] data=[[4]] all_rows=2 ixes_header=["A", "B"] ixes=[['a'], [2]] - vis2 . should_equal json2 - - Test.specify "should visualize database aggregates" <| - agg = t.group by="A" - vis = Visualization.prepare_visualization agg 1 - json = make_json header=["B", "C"] data=[[2], [3]] all_rows=3 ixes_header=["A"] ixes=[['a']] - vis . should_equal json - - vis2 = Visualization.prepare_visualization (agg.at "C") 1 - json2 = make_json header=["C"] data=[[3]] all_rows=3 ixes_header=["A"] ixes=[['a']] + json2 = make_json header=["Average C"] data=[[4.0]] all_rows=2 ixes_header=[] ixes=[] vis2 . should_equal json2 t2 = Dataframe_Table.new [["A", [1, 2, 3]], ["B", [4, 5, 6]], ["C", [7, 8, 9]]] @@ -78,21 +69,6 @@ visualization_spec connection = json = make_json header=["A"] data=[[1, 2]] all_rows=3 ixes_header=[""] ixes=[[0, 1]] vis . should_equal json - g = t2.group by="A" . at "C" . mean - vis2 = Visualization.prepare_visualization g 1 - json2 = make_json header=["C_mean"] data=[[7.0]] all_rows=3 ixes_header=["A"] ixes=[[1]] - vis2 . should_equal json2 - - Test.specify "should visualize dataframe aggregates" <| - agg = t2.group by="A" - vis = Visualization.prepare_visualization agg 1 - json = make_json header=["B", "C"] data=[[4], [7]] all_rows=3 ixes_header=["A"] ixes=[[1]] - vis . should_equal json - - vis2 = Visualization.prepare_visualization (agg.at "C") 1 - json2 = make_json header=["C"] data=[[7]] all_rows=3 ixes_header=["A"] ixes=[[1]] - vis2 . should_equal json2 - Test.specify "should handle Vectors" <| vis = Visualization.prepare_visualization [1, 2, 3] 2