From 57cb811dbf7a4e18a5621cee4fb6cb7afccd2243 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 9 Jun 2022 11:16:38 +0100 Subject: [PATCH 01/23] Tidy up part 1 --- .../Standard/Table/0.0.0-dev/src/Data/Table.enso | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index ec89de325557..9b8f34772bac 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -748,20 +748,6 @@ type Table Nothing -> Error.throw No_Index_Set_Error i -> Column.Column i - ## Alias Select Columns - - Selects a subset of columns from this table by name. - - > Example - Get the item name and price columns from the shop inventory. - - import Standard.Examples - - example_select = - Examples.inventory_table.select ["item_name", "price"] - select : Vector -> Table - select columns = Table (this.java_table.selectColumns columns.to_array) - ## ALIAS Join Table Efficiently joins two tables based on either the index or the specified From cedb82035883503bc117488243a2800dd3623326 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 9 Jun 2022 14:38:13 +0100 Subject: [PATCH 02/23] Retire old `select` methods. --- .../Standard/Database/0.0.0-dev/src/Data/Table.enso | 13 ------------- .../0.0.0-dev/src/Data_Science/Preparation.enso | 3 ++- .../0.0.0-dev/src/Table/Visualization.enso | 5 +++-- test/Table_Tests/src/Database/Codegen_Spec.enso | 5 +++-- test/Table_Tests/src/Database/Common_Spec.enso | 11 +++++++---- test/Table_Tests/src/Table_Spec.enso | 3 --- 6 files changed, 15 insertions(+), 25 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index fdb28b90f287..353cf9ade123 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -567,19 +567,6 @@ type Table new_ctx = this.context.set_orders elems this.updated_context new_ctx - ## UNSTABLE - - Selects a subset of columns from this table by name. - - Arguments: - - columns: The names of the columns to select from the table. - select : Vector Text -> Table - select columns = - candidates = this.internal_columns + this.context.meta_index - find_col name = candidates.find (p -> p.name == name) - selected_cols = columns.map (find_col >> .catch) . filter (c -> c.is_nothing.not) - this.updated_columns selected_cols - ## UNSTABLE Efficiently joins two tables based on either the index or a key column. diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso index e03e572a4ab7..4e2778e5c45e 100644 --- a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso +++ b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Preparation.enso @@ -11,9 +11,10 @@ Get the item name and price columns from the shop inventory. import Standard.Examples + from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name example_select = - Examples.inventory_table.select ["item_name", "price"] + Examples.inventory_table.select_columns (By_Name ["item_name", "price"]) > Example Remove any rows that contain missing values from the table. diff --git a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso index e49840fd1546..49057ba12b9e 100644 --- a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso +++ b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso @@ -1,4 +1,5 @@ from Standard.Base import all +from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name import Standard.Table.Data.Table as Dataframe_Table import Standard.Table.Data.Column as Dataframe_Column @@ -31,8 +32,8 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of # Materialize a table with indices as normal columns (because dataframe does not support multi-indexing). df = x.reset_index.to_dataframe max_rows # Then split into actual columns and indices. - vis_df = df.select (x.columns.map .name) - indices = df.select (x.indices.map .name) . columns + vis_df = df.select_columns (By_Name (x.columns.map .name)) + indices = df.select_columns (By_Name (x.indices.map .name)) . columns all_rows_count = x.row_count here.make_json vis_df indices all_rows_count diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 4da2c8f66dc8..8ed3596312ce 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -1,4 +1,5 @@ from Standard.Base import all +from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name import project.Database.Helpers.Fake_Test_Connection import Standard.Database.Data.Dialect @@ -38,7 +39,7 @@ spec = Test.group "[Codegen] Basic Select" <| Test.specify "should select columns from a table" <| t1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []] - t2 = t1.select ["C", "B", "undefined"] + t2 = t1.select_columns (By_Name ["C", "B", "undefined"]) t2.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B" FROM "T1" AS "T1"', []] foo = t1.at "A" . rename "FOO" @@ -55,7 +56,7 @@ spec = t2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" LIMIT 5', []] Test.specify "should work correctly when there are no columns" <| - empty = t1.select [] + empty = t1.select_columns (By_Name []) json = Json.from_pairs [["query", Nothing], ["message", "The table has no columns so a query cannot be generated."]] empty.to_json . should_equal json empty.columns.length . should_equal 0 diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index 7395898473e8..2cca05a11729 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -1,5 +1,8 @@ from Standard.Base import all +from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name + from Standard.Database import all + import Standard.Table.Data.Table as Materialized_Table import Standard.Test import project.Database.Helpers.Name_Generator @@ -46,7 +49,7 @@ spec prefix connection pending=Nothing = ix2.name . should_equal 'a' ix2.to_vector . should_equal [1, 4] Test.specify "should work correctly when there are no columns" <| - empty = t1.select [] + empty = t1.select_columns (By_Name []) empty.to_dataframe.columns.length . should_equal 0 empty.to_dataframe.row_count . should_equal empty.row_count Test.specify "should handle bigger result sets" <| @@ -159,7 +162,7 @@ spec prefix connection pending=Nothing = ta_2 = ta.set_index "id" tb_2 = tb.set_index "id" res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b" - sel = res.select ["name_a", "name_b"] + sel = res.select_columns (By_Name ["name_a", "name_b"]) df = sel.to_dataframe . sort by="name_a" df . at "name_a" . to_vector . should_equal ["Foo", "Hmm"] df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"] @@ -323,8 +326,8 @@ spec prefix connection pending=Nothing = t = t0.set_index 'ix' Test.specify "should be accessible by `at` like other columns" <| t.at 'ix' . to_vector . should_equal t.index.to_vector - Test.specify "should be accessible by `select` like other columns" <| - t.select ['ix'] . columns . first . to_vector . should_equal t.index.to_vector + Test.specify "should be accessible by `select_columns` like other columns" <| + t.select_columns (By_Name ['ix']) . columns . first . to_vector . should_equal t.index.to_vector Test.specify "treated as a column indexed by itself should still correctly compute values" <| col = t.index+10 vec = [11, 12, 13] diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 82fbf7b9e97f..6f0e7ab4465f 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -507,9 +507,6 @@ spec = i = t.index c.to_vector . should_equal i.to_vector - Test.specify "should be accessible by `select` like other columns" <| - t.select ['ix'] . columns . first . to_vector . should_equal t.index.to_vector - Test.specify "should be able to be set by column" <| with_index = t.set_index c with_index.index.to_vector . should_equal c.to_vector From c0f89c3b70edd30a018a4d1e4fdce491b7235dcf Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 9 Jun 2022 14:49:01 +0100 Subject: [PATCH 03/23] Remove old `group` function. --- .../Database/0.0.0-dev/src/Data/Table.enso | 21 ----------- .../Table/0.0.0-dev/src/Data/Table.enso | 27 -------------- .../Table_Tests/src/Database/Common_Spec.enso | 35 ------------------- test/Table_Tests/src/Table_Spec.enso | 20 ----------- 4 files changed, 103 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 353cf9ade123..253d6c6940ce 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -660,27 +660,6 @@ type Table Table new_table_name this.connection new_columns new_ctx - ## UNSTABLE - - Returns an aggregate table resulting from grouping the elements by the - value of the specified column. - - Arguments: - - by: The column names on which to group. If this is not set, the index - will be used for grouping instead. - group : Vector Text | Text | Nothing -> Aggregate_Table - group by=Nothing = Panic.recover Any <| - cols = case by of - Nothing -> - if this.context.meta_index.is_empty then Panic.throw <| Illegal_State_Error "Trying to group by an empty index." else - this.context.meta_index - _ -> - - Helpers.unify_vector_singleton by . map (this.resolve >> .as_internal) - exprs = cols.map .expression - new_ctx = this.context.set_groups exprs . set_index cols - Aggregate_Table this.name this.connection this.internal_columns new_ctx - ## Prototype Group By function aggregate : [Aggregate_Column] -> Problem_Behavior -> Table aggregate columns (on_problems=Report_Warning) = diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 9b8f34772bac..b96b2fa780ab 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -861,33 +861,6 @@ type Table cols = this.columns here.new [["Column", cols.map .name], ["Items Count", cols.map .count], ["Storage Type", cols.map .storage_type]] . set_index "Column" - ## ALIAS Group a Table - - Returns an aggregate table resulting from grouping the elements by the - value of the specified column. - - Arguments: - - by: The column in the table to perform grouping by. If this argument - is not set, the index is used for grouping instead. - - > Example - Compute the number of transactions that each item has participated in, - as well as the number of each item sold across those transactions. - - import Standard.Examples - import Standard.Table - - example_group = - transactions = Examples.transactions_table - item_names = Examples.inventory_table.at "item_name" - aggregated = transactions.group by="item_id" - num_transactions = aggregated.at "transaction_id" . reduce .length . rename "transaction_count" - num_sold = aggregated.at "quantity" . reduce .sum . rename "num_sold" - Table.join [item_names, num_transactions, num_sold] - group : Text | Nothing -> Aggregate_Table - group by=Nothing = - Aggregate_Table (this.java_table.group by) - ## ALIAS Sort Table UNSTABLE diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index 2cca05a11729..f2740a26feb2 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -204,41 +204,6 @@ spec prefix connection pending=Nothing = empty.columns.length . should_equal 0 empty.to_dataframe.columns.length . should_equal 0 - Test.group prefix+"Old Aggregation" pending=pending <| - t = upload "T6" <| - Materialized_Table.new [["name", ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]], ["price", [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]], ["quantity", [10, 20, 30, 40, 50, 60, 70]]] - agg = t.group by='name' - ## A helper which makes sure that the groups are ordered according to the index, using the Table library - determinize col = - df = col.to_dataframe.to_table - df.sort by=df.index . at col.name - - Test.specify "should allow counting group sizes" <| - determinize agg.count . to_vector . should_equal [2, 1, 3, 1] - - Test.specify "should allow aggregating columns with basic arithmetic aggregators" <| - determinize (agg.at 'price' . mean) . to_vector . should_equal [50.25, 6.7, 0.4, Nothing] - determinize (agg.at 'price' . min) . to_vector . should_equal [3.5, 6.7, 0.4, Nothing] - determinize (agg.at 'price' . max) . to_vector . should_equal [97, 6.7, 0.4, Nothing] - - Test.specify "should allow to join multiple aggregations" <| - m1 = agg.at 'price' . mean - m2 = agg.at 'quantity' . max - df = (m1.join m2).to_dataframe - df2 = df.sort by=df.index - df2.at 'price_mean' . to_vector . should_equal [50.25, 6.7, 0.4, Nothing] - df2.at 'quantity_max' . to_vector . should_equal [60, 40, 50, 70] - - Test.specify "should correctly compute the result size" <| - m = agg.at 'price' . mean - m.length . should_equal m.to_vector.length - m.length . should_equal 4 - - Test.specify "should correctly count values" <| - m = agg.at 'price' . mean - m.count . should_equal 3 - m.count_missing . should_equal 1 - Test.group prefix+"Column-wide statistics" pending=pending <| Test.specify 'should allow computing basic column-wide stats' <| t7 = upload "T7" <| diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 6f0e7ab4465f..44e31912c69c 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -321,26 +321,6 @@ spec = i.at "Items Count" . to_vector . should_equal [3, 2, 4] i.at "Storage Type" . to_vector . should_equal [Storage.Text, Storage.Integer, Storage.Any] - Test.group "Aggregation" <| - name = ['name', ["foo", "bar", "foo", "baz", "foo", "bar", "quux"]] - price = ['price', [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing]] - quantity = ['quantity', [10, 20, 30, 40, 50, 60, 70]] - t = Table.new [name, price, quantity] - agg = t.group by='name' - - Test.specify "should allow counting group sizes" <| - agg.count.to_vector.should_equal [3, 2, 1, 1] - - Test.specify "should allow aggregating columns with basic arithmetic aggregators" <| - agg.at 'price' . mean . to_vector . should_equal [0.4, 50.25, 6.7, Nothing] - agg.at 'price' . min . to_vector . should_equal [0.4, 3.5, 6.7, Nothing] - - Test.specify "should allow aggregating with user-defined aggregate functions" <| - median vec = - sorted = vec.sort - if sorted.is_empty then Nothing else sorted.at (sorted.length-1 / 2).floor - agg.at 'quantity' . reduce median . to_vector . should_equal [30, 20, 40, 70] - Test.group "Column-wide statistics" <| Test.specify 'should allow computing basic column-wide stats' <| price = Column.from_vector 'price' [0.4, 3.5, Nothing, 6.7, Nothing, 97, Nothing] From 58319cc63c0f432274b95353bd37e94ea86dd777 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 9 Jun 2022 14:57:10 +0100 Subject: [PATCH 04/23] Remove old `Aggregate_Table` type. --- .../Database/0.0.0-dev/src/Data/Table.enso | 65 --------------- .../Standard/Examples/0.0.0-dev/src/Main.enso | 8 -- .../Table/0.0.0-dev/src/Data/Table.enso | 81 ------------------- .../0.0.0-dev/src/Table/Visualization.enso | 5 -- test/Tests/src/Examples_Spec.enso | 3 - 5 files changed, 162 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 253d6c6940ce..bde5bb1317ac 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -884,71 +884,6 @@ type Table False -> Error.throw <| Illegal_State_Error "The update unexpectedly affected "+affected_rows.to_text+" rows." True -> Nothing - -## Represents a table with grouped rows. -type Aggregate_Table - - ## UNSTABLE - - Represents a table with grouped rows. - - Arguments: - - name: The name of the table. - - connection: The connection with which the table is associated. - - internal_columns: The internal representation of the table columns. - - context: The context associated with this table. - # type Aggregate_Table (name : Text) (connection : Connection) - # (internal_columns : Vector [Text, IR.Expression]) - # (context : IR.Context) - type Aggregate_Table name connection internal_columns context - - ## UNSTABLE - - Returns a vector of aggregate columns in this table. - columns : Vector.Vector - columns = this.internal_columns . map this.make_column - - ## UNSTABLE - - Returns a column containing the number of elements in each group. - count : Column - count = - expr = IR.Operation "COUNT_ROWS" [] - # new_name connection expected_type expr context - Column_Module.lift_aggregate "count" this.connection Sql.Sql_Type.integer expr this.context - - ## UNSTABLE - - Returns an aggregate column with the given name, contained in this table. - - Arguments: - - name: The name of the aggregate column to get from the aggregate table. - at : Text -> Column ! No_Such_Column_Error - at name = - internal = this.internal_columns.find (p -> p.name == name) - this.make_column internal . map_error (_ -> No_Such_Column_Error name) - - ## PRIVATE - - Helper to create aggregate columns from internal columns. - - Arguments: - - internal: The internal column to make into an aggregate column. - make_column : Internal_Column -> Aggregate_Column_Builder - make_column internal = - Aggregate_Column_Builder internal.name this.connection internal.sql_type internal.expression this.context - - ## PRIVATE - - Helper that returns the underlying table from before grouping. - ungrouped : Table - ungrouped = - new_ctx = this.context.set_groups [] - new_cols = this.internal_columns.filter col-> - turned_into_index = this.context.meta_index.exists i-> i.name == col.name - turned_into_index.not - Table this.name this.connection new_cols new_ctx - type Integrity_Error ## UNSTABLE diff --git a/distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso index 0fed3ca039fe..b838919ac333 100644 --- a/distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso @@ -278,14 +278,6 @@ transactions_table : Table.Table transactions_table = (Enso_Project.data / "food_shop_transactions.csv") . read -## An aggregate table for the relevant examples. -aggregate_table : Table.Aggregate_Table -aggregate_table = - transactions = here.transactions_table - item_names = here.inventory_table.at "item_name" - with_names = transactions.join item_names on="item_id" - with_names.group by="item_name" - ## An example regex match. match : Default_Engine.Match match = diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index b96b2fa780ab..159a1e946319 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -1306,7 +1306,6 @@ type Table Format.Xlsx sheet mode header max -> this.write_xlsx file sheet mode header max Format.Json -> this.write_json file - ## UNSTABLE Used for converting arbitrary values into fields in CSV files. @@ -1348,86 +1347,6 @@ Text.write_to_spreadsheet cell = cell.setCellValue this which should be set by this method. Date.write_to_spreadsheet cell = cell.setCellValue this.internal_local_date - - -## Represents a table with grouped rows. -type Aggregate_Table - - ## PRIVATE - - A table type with grouped rows. - - Arguments: - - java_table: The internal representation of the table. - type Aggregate_Table java_table - - ## Returns a vector of aggregate columns in this table. - - > Example - Get a vector of aggregate columns from this table. - - import Standard.Examples - - example_columns = Examples.aggregate_table.columns - columns : Vector.Vector - columns = Vector.Vector this.java_table.getColumns . map Column.Aggregate_Column - - ## Returns a table containing columns resulting from calling `values` on - each column in `this`. - - > Example - Get the values table from an aggregate table. - - import Standard.Examples - - example_values = Examples.aggregate_table.values - values : Table - values = this.columns . map (_.values name_suffix='') . reduce .join - - ## Returns a column containing the number of elements in each group of the - aggregate table. - - > Examples - Get the counts for an aggregate table. - - import Standard.Examples - - example_count = Examples.aggregate_table.count - count : Column - count = Column.Column this.java_table.count - - ## ALIAS Get a Column - - Returns an aggregate column with the given name, contained in this table. - - Arguments: - - name: The name of the aggregate column to get. - - > Example - Get the transaction ids column from the aggregate table. - - import Standard.Examples - - example_at = Examples.aggregate_table.at "transaction_id" - at : Text -> Column ! No_Such_Column_Error - at name = case this.java_table.getColumnByName name of - Nothing -> Error.throw (No_Such_Column_Error name) - c -> Column.Aggregate_Column c - - ## Prints an ASCII-art table with this data to the standard output. - - Arguments: - - show_rows: the number of initial rows that should be displayed. - - > Example - Pretty-print and display an aggregate table in the console. - - import Standard.Examples - - example_print = Examples.aggregate_table.print - print : Integer -> Nothing - print show_rows=10 = this.values.print show_rows - ## UNSTABLE An error returned when a non-existent column is being looked up. diff --git a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso index 49057ba12b9e..86f427d90605 100644 --- a/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso +++ b/distribution/lib/Standard/Visualization/0.0.0-dev/src/Table/Visualization.enso @@ -44,14 +44,9 @@ prepare_visualization x max_rows = Helpers.recover_errors <| case x of here.prepare_visualization x.to_table max_rows # We display aggregates as their ungrouped counterparts. - Dataframe_Table.Aggregate_Table _ -> - ungrouped = Dataframe_Table.Table x.java_table.getUnderlyingTable - here.prepare_visualization ungrouped max_rows Dataframe_Column.Aggregate_Column _ -> ungrouped = Dataframe_Column.Column x.java_column.getColumn here.prepare_visualization ungrouped.to_table max_rows - Database_Table.Aggregate_Table _ _ _ _ -> - here.prepare_visualization x.ungrouped max_rows Database_Column.Aggregate_Column_Builder _ _ _ _ _ -> here.prepare_visualization x.ungrouped.to_table max_rows diff --git a/test/Tests/src/Examples_Spec.enso b/test/Tests/src/Examples_Spec.enso index 34acbc2125bd..89724e901aac 100644 --- a/test/Tests/src/Examples_Spec.enso +++ b/test/Tests/src/Examples_Spec.enso @@ -120,9 +120,6 @@ spec = Test.group "Examples" <| Examples.popularity_table Examples.transactions_table - Test.specify "should provide an aggregate table" <| - Examples.aggregate_table - Test.specify "should provide an example of a regex match" <| match = Examples.match match.groups.length . should_equal 5 From bb9e144f6871717d272f39a258ddd42ce584f6da Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 9 Jun 2022 15:03:13 +0100 Subject: [PATCH 05/23] Remove Java `AggregateTable` type. --- .../java/org/enso/table/data/table/Table.java | 6 -- .../data/table/aggregate/AggregateColumn.java | 58 ------------------ .../data/table/aggregate/AggregateTable.java | 59 ------------------- 3 files changed, 123 deletions(-) delete mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateColumn.java delete mode 100644 std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateTable.java diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java index 880b446498d2..ae1e5155d5c2 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/table/Table.java +++ b/std-bits/table/src/main/java/org/enso/table/data/table/Table.java @@ -10,7 +10,6 @@ import org.enso.table.data.index.Index; import org.enso.table.data.index.MultiValueIndex; import org.enso.table.data.mask.OrderMask; -import org.enso.table.data.table.aggregate.AggregateTable; import org.enso.table.data.table.problems.AggregatedProblems; import org.enso.table.error.NoSuchColumnException; import org.enso.table.error.UnexpectedColumnTypeException; @@ -468,11 +467,6 @@ private Table hconcat(Table other, String lsuffix, String rsuffix) { return new Table(newColumns, index); } - public AggregateTable group(String by) { - Table t = by == null ? this : indexFromColumn(by); - return new AggregateTable(t); - } - /** @return a copy of the Column containing a slice of the original data */ public Table slice(int offset, int limit) { Column[] newColumns = new Column[columns.length]; diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateColumn.java b/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateColumn.java deleted file mode 100644 index 8aa00e26e62f..000000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateColumn.java +++ /dev/null @@ -1,58 +0,0 @@ -package org.enso.table.data.table.aggregate; - -import org.enso.table.data.column.operation.aggregate.Aggregator; -import org.enso.table.data.index.Index; -import org.enso.table.data.table.Column; - -import java.util.List; -import java.util.function.Function; -import java.util.stream.IntStream; - -/** A column wrapper used for aggregation operations. */ -public class AggregateColumn { - private final Index uniqueIndex; - private final Column column; - - /** - * Creates a new column - * - * @param uniqueIndex the unique index obtained from the column's index - * @param column the wrapped column - */ - public AggregateColumn(Index uniqueIndex, Column column) { - this.uniqueIndex = uniqueIndex; - this.column = column; - } - - /** - * Aggregates the groups using a given aggregation operation. - * - * @param aggName name of a vectorized operation that can be used if possible. If null is passed, - * this parameter is unused. - * @param outSuffix a string appended to the name of the resulting column. - * @param aggregatorFunction the function to use if a vectorized operation is not available. - * @param skipNa whether missing values should be passed to the {@code fallback} function. - * @return a column indexed by the unique index of this aggregate, storing results of applying the - * specified operation. - */ - public Column aggregate( - String aggName, - String outSuffix, - Function, Object> aggregatorFunction, - boolean skipNa) { - Aggregator aggregator = - column.getStorage().getAggregator(aggName, aggregatorFunction, skipNa, uniqueIndex.size()); - - for (int i = 0; i < uniqueIndex.size(); i++) { - IntStream ixes = - column.getIndex().loc(uniqueIndex.iloc(i)).stream().mapToInt(Integer::intValue); - aggregator.nextGroup(ixes); - } - return new Column(column.getName() + outSuffix, uniqueIndex, aggregator.seal()); - } - - /** @return the underlying (ungrouped) column. */ - public Column getColumn() { - return column; - } -} diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateTable.java b/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateTable.java deleted file mode 100644 index 229345013bcc..000000000000 --- a/std-bits/table/src/main/java/org/enso/table/data/table/aggregate/AggregateTable.java +++ /dev/null @@ -1,59 +0,0 @@ -package org.enso.table.data.table.aggregate; - -import org.enso.table.data.column.storage.LongStorage; -import org.enso.table.data.index.Index; -import org.enso.table.data.table.Column; -import org.enso.table.data.table.Table; - -import java.util.Arrays; -import java.util.List; - -/** Represents a table grouped by a given index. */ -public class AggregateTable { - private final Table table; - private final Index uniqueIndex; - - /** @param table the underlying table */ - public AggregateTable(Table table) { - this.table = table; - this.uniqueIndex = table.getIndex().unique(); - } - - /** @return a column containing group sizes in this aggregate. */ - public Column count() { - long[] counts = new long[uniqueIndex.size()]; - for (int i = 0; i < uniqueIndex.size(); i++) { - List items = table.getIndex().loc(uniqueIndex.iloc(i)); - counts[i] = items == null ? 0 : items.size(); - } - LongStorage storage = new LongStorage(counts); - return new Column("count", uniqueIndex, storage); - } - - /** - * Returns a column with the given name. - * - * @param n the column name - * @return column with the given name or null if does not exist - */ - public AggregateColumn getColumnByName(String n) { - Column c = table.getColumnByName(n); - if (c == null) { - return null; - } else { - return new AggregateColumn(uniqueIndex, c); - } - } - - /** @return Aggregate columns contained in this table. */ - public AggregateColumn[] getColumns() { - return Arrays.stream(table.getColumns()) - .map(c -> new AggregateColumn(uniqueIndex, c)) - .toArray(AggregateColumn[]::new); - } - - /** @return the underlying (ungrouped) table. */ - public Table getUnderlyingTable() { - return table; - } -} From c8919c90bf8e101dfed1485add00e9b0205f436f Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 9 Jun 2022 15:12:00 +0100 Subject: [PATCH 06/23] Fix a broken test. --- test/Table_Tests/src/Database/Codegen_Spec.enso | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 8ed3596312ce..1100142f6130 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -39,7 +39,7 @@ spec = Test.group "[Codegen] Basic Select" <| Test.specify "should select columns from a table" <| t1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []] - t2 = t1.select_columns (By_Name ["C", "B", "undefined"]) + t2 = t1.select_columns (By_Name ["C", "B", "undefined"]) reorder=True t2.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B" FROM "T1" AS "T1"', []] foo = t1.at "A" . rename "FOO" From be08e1dc68f9bd0be8924a0e8f42e686af56f103 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Thu, 9 Jun 2022 15:29:37 +0100 Subject: [PATCH 07/23] Start removing `sort` method and `Order_Rule`. --- .../Table/0.0.0-dev/src/Data/Order_Rule.enso | 31 --- .../Table/0.0.0-dev/src/Data/Table.enso | 190 ++++-------------- .../Standard/Table/0.0.0-dev/src/Main.enso | 2 - 3 files changed, 38 insertions(+), 185 deletions(-) delete mode 100644 distribution/lib/Standard/Table/0.0.0-dev/src/Data/Order_Rule.enso diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Order_Rule.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Order_Rule.enso deleted file mode 100644 index aaaee19f0a1c..000000000000 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Order_Rule.enso +++ /dev/null @@ -1,31 +0,0 @@ -from Standard.Base import all - -type Order_Rule - - ## UNSTABLE - - A rule used for sorting table-like structures. - - Arguments: - - column: a value representing the data dimension by which this rule is - sorting. This type does not specify the underlying representation of a - column, assuming that the sorting engine defines its own column - representation. - - comparator: a function taking two elements of the data being sorted on - and returning an `Ordering`. The function may be `Nothing`, in which - case a natural ordering will be used. Note that certain table backends - (such us database connectors) may not support this field being set to a - non-`Nothing` value. - - order: specifies whether the table should be sorted in an ascending or - descending order. The default value of `Nothing` delegates the decision - to the sorting function. Can be set to `Sort_Direction.Ascending` or - `Sort_Direction.Descending` from the `Base` library, to specify the - ordering. - - missing_last: whether the missing values should be placed at the - beginning or end of the sorted table. Note that this argument is - independent from `order`, i.e. missing values will always be sorted - according to this rule, ignoring the ascending / descending setting. - The default value of `Nothing` delegates the decision to the sorting - function. - type Order_Rule column comparator=Nothing order=Nothing missing_last=Nothing - diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 159a1e946319..67d6ef4611cb 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -13,7 +13,6 @@ import Standard.Table.Internal.Parse_Values_Helper import Standard.Table.Internal.Delimited_Reader import Standard.Table.Internal.Problem_Builder -from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index from Standard.Table.Data.Column_Type_Selection as Column_Type_Selection_Module import Column_Type_Selection, Auto from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter @@ -31,7 +30,6 @@ import Standard.Base.Data.Ordering.Comparator polyglot java import org.enso.table.data.table.Table as Java_Table polyglot java import org.enso.table.data.table.Column as Java_Column -polyglot java import org.enso.table.operations.OrderBuilder polyglot java import org.enso.table.format.csv.Writer as Csv_Writer polyglot java import org.enso.table.format.xlsx.Writer as Spreadsheet_Writer polyglot java import java.io.StringReader @@ -571,6 +569,44 @@ type Table descending order. table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending]) + > Example + Sorting the shop inventory based on the per-item price in ascending + order. + + import Standard.Examples + + example_sort = Examples.inventory_table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price"]) + + > Example + Sort the shop inventory based on the per-item price in descending order + + import Standard.Examples + + example_sort = + table = Examples.inventory_table + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending]) + + > Example + Sort the shop inventory based on the total stock, using the number sold + to break ties in descending order. + + import Standard.Examples + + example_sort = + table = Examples.inventory_table + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock" Sort_Direction.Descending, Sort_Column.Name "sold_stock" Sort_Direction.Descending]) + + > Example + Sort the shop inventory in ascending order by the total stock, using + the number of items sold in descending order to break ties. + + import Standard.Examples + import Standard.Table + + example_sort = + table = Examples.inventory_table + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending]) + order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning = problem_builder = Problem_Builder.new @@ -861,156 +897,6 @@ type Table cols = this.columns here.new [["Column", cols.map .name], ["Items Count", cols.map .count], ["Storage Type", cols.map .storage_type]] . set_index "Column" - ## ALIAS Sort Table - UNSTABLE - - Sorts the table according to the specified rules. - - Arguments: - - by: Specifies the columns used for reordering the table. This argument - may be one of: - - a text: The text is treated as a column name. - - a column: Any column, that may or may not belong to this table. - Sorting by a column will result in reordering the rows of this - table in a way that would result in sorting the given column. - - an order rule: Specifies both the sorting column and additional - settings, that will take precedence over the global parameters of - this sort operation. The `column` field of the rule may be a text - or a column, with the semantics described above. - - a vector of any of the above: This will result in a hierarchical - sorting, such that the first rule is applied first, the second is - used for breaking ties, etc. - - order: Specifies the default sort order for this operation. All the - rules specified in the `by` argument will default to this setting, - unless specified in the rule. - - missing_last: Specifies the default placement of missing values when - compared to non-missing ones. This setting may be overriden by the - particular rules of the `by` argument. Note thet this argument is - independent from `order`, i.e. missing values will always be sorted - according to this rule, ignoring the ascending / descending setting. - - > Example - Sorting the shop inventory based on the per-item price in ascending - order. - - import Standard.Examples - - example_sort = Examples.inventory_table.sort by="price" - - > Example - Sort the shop inventory based on the per-item price in descending order - and placing missing values at the top of the table. - - import Standard.Examples - - example_sort = - table = Examples.inventory_table - table.sort by="price" order=Sort_Direction.Descending missing_last=false - - > Example - Sort the shop inventory based on the total stock, using the number sold - to break ties in descending order. - - import Standard.Examples - - example_sort = - table = Examples.inventory_table - table.sort by=["total_stock", "sold_stock"] order=Sort_Direction.Descending - - > Example - Sort the shop inventory in ascending order by the total stock, using - the number of items sold in descending order to break ties. - - import Standard.Examples - import Standard.Table - - example_sort = - table = Examples.inventory_table - sold_stock_rule = Table.Order_Rule "sold_stock" order=Sort_Direction.Descending - table.sort by=["total_stock", sold_stock_rule] - - > Example - Sorting the inventory in descending order based on the percentage of - the total stock sold, using the popularity of the product to break - ties. - - import Standard.Examples - import Standard.Table - - example_sort = - table = Examples.inventory_table.join Examples.popularity_table - percentage_sold = table.at "sold_stock" / table.at "total_stock" - table.sort by=[percentage_sold, "popularity"] order=Sort_Direction.Descending - - > Example - Sort the inventory by the price using a custom comparator function. - - import Standard.Examples - import Standard.Table - - example_sort = - table = Examples.inventory_table - comparator a b = a.compare_to b*2 - price_rule = Table.Order_Rule "price" comparator=comparator - table.sort by=price_rule - sort : Text | Column.Column | Order_Rule | Vector.Vector (Text | Column.Column | Order_Rule) -> Sort_Direction -> Boolean -> Table - sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <| - rules = this.build_java_order_rules by order missing_last - fallback_cmp = here.comparator_to_java .compare_to - mask = OrderBuilder.buildOrderMask rules.to_array fallback_cmp - new_table = this.java_table.applyMask mask - Table new_table - - ## PRIVATE - - Transforms order rules from Enso into Java. - - Arguments: - - rules: The rule(s) to convert. - - order: The sorting order. - - missing_last: Whether or not missing values should be ordered last. - build_java_order_rules : (Text | Column.Column. | Order_Rule | Vector (Text | Column.Column | Order_Rule)) -> Sort_Direction -> Boolean -> Vector - build_java_order_rules rules order missing_last = case rules of - Text -> [this.build_java_order_rule rules order missing_last] - Column.Column _ -> [this.build_java_order_rule rules order missing_last] - Order_Rule _ _ _ _ -> [this.build_java_order_rule rules order missing_last] - Vector.Vector _ -> rules.map (this.build_java_order_rule _ order missing_last) - - ## PRIVATE - - Builds a java order rule. - - Arguments: - - rule: The rule to convert. - - order: The sort order. - - missing_last: Whether or not missing values should be ordered last. - build_java_order_rule : (Text | Column.Column | Order_Rule) -> Sort_Direction -> Boolean -> OrderRule - build_java_order_rule rule order missing_last = - order_bool = case order of - Sort_Direction.Ascending -> True - Sort_Direction.Descending -> False - case rule of - Text -> - column = Panic.rethrow (this.at rule) - OrderBuilder.OrderRule.new column.java_column Nothing order_bool missing_last - Column.Column c -> - OrderBuilder.OrderRule.new c Nothing order_bool missing_last - Order_Rule col_ref cmp rule_order rule_nulls_last -> - c = case col_ref of - Text -> this.at col_ref . java_column - Column.Column c -> c - o = case rule_order of - Nothing -> order_bool - Sort_Direction.Ascending -> True - Sort_Direction.Descending -> False - nulls = case rule_nulls_last of - Nothing -> missing_last - _ -> rule_nulls_last - java_cmp = case cmp of - Nothing -> Nothing - c -> here.comparator_to_java c - OrderBuilder.OrderRule.new c java_cmp o nulls - ## UNSTABLE Concatenates `other` to `this`. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso index 80a19a83b560..1562965578e5 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso @@ -8,7 +8,6 @@ import Standard.Table.Io.Spreadsheet import Standard.Table.Io.Spreadsheet_Write_Mode import Standard.Table.Data.Table import Standard.Table.Data.Column -import Standard.Table.Data.Order_Rule import Standard.Table.Model from Standard.Table.Io.Excel export Excel_Section, Excel_Range, read_excel @@ -21,7 +20,6 @@ export Standard.Table.Model export Standard.Table.Io.File_Read from Standard.Table.Data.Table export new, from_rows, join, concat, No_Such_Column_Error, Table -from Standard.Table.Data.Order_Rule export Order_Rule ## ALIAS To Table From 67b8c696819b95660e06a7eda2642cca6aec8a23 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 10 Jun 2022 12:04:58 +0100 Subject: [PATCH 08/23] Update a lot of `sort` to the new APIs. --- .../Database/0.0.0-dev/src/Data/Table.enso | 101 +++++------------- .../Standard/Database/0.0.0-dev/src/Main.enso | 2 - .../Searcher/0.0.0-dev/src/Data_Science.enso | 4 +- .../0.0.0-dev/src/Data_Science/Transform.enso | 7 +- test/Table_Tests/src/Aggregate_Spec.enso | 8 +- .../src/Database/Codegen_Spec.enso | 4 +- .../Table_Tests/src/Database/Common_Spec.enso | 2 +- test/Table_Tests/src/Table_Spec.enso | 2 +- 8 files changed, 40 insertions(+), 90 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index bde5bb1317ac..5d690f7461d5 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -15,7 +15,6 @@ import Standard.Table.Internal.Aggregate_Column_Helper from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column_Builder from Standard.Database.Data.Internal.IR import Internal_Column from Standard.Table.Data.Table import No_Such_Column_Error -from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter @@ -354,7 +353,7 @@ type Table Since this Table is backed by an SQL database, the Table returned by the `limit` method is deterministic only if the Table has been ordered (using - the `sort` method). + the `order_by` method). Otherwise, no order is imposed, so the returned Table will include at most `max_rows` rows, but there are no guarantees on which rows will be @@ -363,7 +362,7 @@ type Table Table is materialized. The limit is applied at the very end, so the new Table behaves exactly as - the old one, just limitting its results when being materialized. + the old one, just limiting its results when being materialized. Specifically, applying further filters will still apply to the whole result set and the limit will be taken after applying these filters. @@ -371,7 +370,7 @@ type Table In the call below, assuming that the table of `t1` contains rows for numbers 1, 2, ..., 10, will return rows starting from 6 and not an empty result as one could expect if the limit was applied before the filters. - t1 = table.sort by='A' . limit 5 + t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5 t2 = t1.where (t1.at 'A' > 5) t2.to_dataframe limit : Integer -> Table @@ -479,6 +478,28 @@ type Table descending order. table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending]) + > Example + Sorting `table` in ascending order by the value in column `'Quantity'`. + + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity']) + + > Example + Sorting `table` in descending order by the value in column `'Quantity'`. + + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity' Sort_Direction.Descending]) + + > Example + Sorting `table` in ascending order by the value in column `'Quantity'`, + using the value in column `'Rating'` for breaking ties. + + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating']) + + > Example + Sorting `table` in ascending order by the value in column `'Quantity'`, + using the value in column `'Rating'` in descending order for breaking + ties. + + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending]) order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning = Panic.handle_wrapped_dataflow_error <| problem_builder = Problem_Builder.new @@ -495,78 +516,6 @@ type Table new_ctx = this.context.add_orders new_order_descriptors this.updated_context new_ctx - ## UNSTABLE - - Sorts the table according to the specified rules. - - Arguments: - - by: Specifies the columns used for reordering the table. This - argument may be one of: - - a text: The text is treated as a column name. - - a column: Any column, which is an expression computed from this - table. - - an order rule: Specifies both the sorting column and additional - settings, that will take precedence over the global parameters of - this sort operation. The `column` field of the rule may be a text - or a column, with the semantics described above. - - a vector of any of the above: This will result in a hierarchical - sorting, such that the first rule is applied first, the second is - used for breaking ties, etc. - - order: Specifies the default sort order for this operation. All the - rules specified in the `by` argument will default to this setting, - unless specified in the rule. - - missing_last: Specifies the default placement of missing values when - compared to non-missing ones. This setting may be overridden by the - particular rules of the `by` argument. Note thet this argument is - independent from `order`, i.e. missing values will always be sorted - according to this rule, ignoring the ascending / descending setting. - - > Example - Sorting `table` in ascending order by the value in column `'Quantity'` - table.sort by='Quantity' - - > Example - Sorting `table` in descending order by the value in column `'Quantity'`, - placing missing values at the top of the table. - table.sort by='Quantity' order=Sort_Direction.Descending missing_last=False - - > Example - Sorting `table` in ascending order by the value in column `'Quantity'`, - using the value in column `'Rating'` for breaking ties. - table.sort by=['Quantity', 'Rating'] - - > Example - Sorting `table` in ascending order by the value in column `'Quantity'`, - using the value in column `'Rating'` in descending order for breaking - ties. - table.sort by=['Quantity', Order_Rule 'Rating' (order=Sort_Direction.Descending)] - - > Example - Sorting `table` in ascending order by the value in an externally - computed column, using the value in column `'Rating'` for breaking - ties. - quality_ratio = table.at 'Rating' / table.at 'Price' - table.sort by=[quality_ratio, 'Rating'] - sort : Text | Column | Order_Rule | Vector.Vector (Text | Column | Order_Rule) -> Sort_Direction -> Boolean -> Table - sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <| - missing_to_ir last = case last of - True -> IR.Nulls_Last - False -> IR.Nulls_First - wrap_elem elem = - IR.Order_Descriptor (this.resolve elem . expression) order (missing_to_ir missing_last) collation=Nothing - to_ir elem = case elem of - Text -> wrap_elem elem - Column _ _ _ _ _ -> wrap_elem elem - Order_Rule elem Nothing my_order my_nulls -> - chosen_order = my_order.if_nothing order - chosen_nulls = my_nulls.if_nothing missing_last - IR.Order_Descriptor (this.resolve elem . expression) chosen_order (missing_to_ir chosen_nulls) collation=Nothing - Order_Rule _ _ _ _ -> - Error.throw <| Unsupported_Database_Operation_Error "Custom comparators are not supported in Database" - elems = Helpers.unify_vector_singleton by . map to_ir - new_ctx = this.context.set_orders elems - this.updated_context new_ctx - ## UNSTABLE Efficiently joins two tables based on either the index or a key column. diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso index 32b6fa192167..a276d4e2d1c2 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso @@ -10,6 +10,4 @@ export Standard.Database.Connection.Connection from Standard.Database.Connection.Database export all import Standard.Table.Data.Table -import Standard.Table.Data.Order_Rule from Standard.Table.Data.Table export No_Such_Column_Error -from Standard.Table.Data.Order_Rule export Order_Rule diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso index 3e2cf4440174..743acc49190d 100644 --- a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso +++ b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science.enso @@ -49,10 +49,12 @@ break ties in descending order. import Standard.Examples + import Standard.Table.Data.Sort_Column_Selector + import Standard.Table.Data.Sort_Column example_sort = table = Examples.inventory_table - table.sort by=["total_stock", "sold_stock"] order=Sort_Direction.Descending + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "total_stock", Sort_Column.Name "sold_stock" Sort_Direction.Descending]) > Example Compute the number of transactions that each item has participated in, as diff --git a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso index 94653376e564..6c502b14690c 100644 --- a/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso +++ b/distribution/lib/Standard/Searcher/0.0.0-dev/src/Data_Science/Transform.enso @@ -24,14 +24,15 @@ example_map = Examples.integer_column.map (x -> x * x) > Example - Sort the shop inventory based on the per-item price in descending order and - placing missing values at the top of the table. + Sort the shop inventory based on the per-item price in descending order. import Standard.Examples + import Standard.Table.Data.Sort_Column_Selector + import Standard.Table.Data.Sort_Column example_sort = table = Examples.inventory_table - table.sort by="price" order=Sort_Direction.Descending missing_last=false + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "price" Sort_Direction.Descending]) > Example Add two columns to each other. diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 44af889f006e..3e69b2d65da9 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -830,7 +830,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te table = table_builder [["A", ["foo", "bar", "foo", "foo"]], ["B", ["a", "b", "c", "d"]]] result = table.aggregate [Group_By "A", (Concatenate "B" prefix="[[" suffix="]]" separator="; ")] result.row_count . should_equal 2 - materialized = materialize result . sort "A" + materialized = materialize result . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) materialized.columns.length . should_equal 2 materialized.columns.at 0 . name . should_equal "A" materialized.columns.at 0 . to_vector . should_equal ["bar", "foo"] @@ -910,14 +910,14 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te r1 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=True)] r1.row_count . should_equal 2 - m1 = materialize r1 . sort "G" + m1 = materialize r1 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"]) m1.columns.length . should_equal 2 m1.columns.first.to_vector . should_equal ["bar", "foo"] m1.columns.second.to_vector . should_equal [0, 1] r2 = t5.aggregate [Group_By "G", Count_Distinct "A" (ignore_nothing=False)] r2.row_count . should_equal 2 - m2 = materialize r2 . sort "G" + m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"]) m2.columns.length . should_equal 2 m2.columns.first.to_vector . should_equal ["bar", "foo"] m2.columns.second.to_vector . should_equal [1, 2] @@ -959,7 +959,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te r2 = table.aggregate [Group_By "G", Average "X"] r2.row_count.should_equal 2 - m2 = materialize r2 . sort "G" + m2 = materialize r2 . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "G"]) m2.columns.length . should_equal 2 m2.columns.first.to_vector . should_equal ["a", "b"] m2.columns.second.to_vector . should_equal [0.5, 1] diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 1100142f6130..822c2ef57c7c 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -9,7 +9,7 @@ import Standard.Test from Standard.Table.Data.Aggregate_Column import all from Standard.Database import all from Standard.Database.Data.Sql import Sql_Type -from Standard.Table import No_Such_Column_Error, Order_Rule +from Standard.Table import No_Such_Column_Error from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error spec = @@ -163,7 +163,7 @@ spec = r1.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY ("T1"."A" + "T1"."B") ASC NULLS LAST', []] Test.specify 'should allow sorting with specific by-column rules' <| - r1 = t1.sort by=['A', (Order_Rule 'B' order=Sort_Direction.Descending)] + r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending]) r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" DESC NULLS LAST', []] Test.specify 'should return dataflow error when passed a non-existent column' <| diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index f2740a26feb2..6168ea41275c 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -245,7 +245,7 @@ spec prefix connection pending=Nothing = r_2.at 'id' . to_vector . should_equal [4,2,1,3,5,6] Test.specify 'should allow sorting with specific by-column rules' <| - r_1 = df.sort by=['quantity', (Order_Rule 'price' order=Sort_Direction.Descending)] + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending]) r_1.at 'id' . to_vector . should_equal [4,2,3,1,6,5] Test.specify 'should return dataflow error when passed a non-existent column' <| diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 44e31912c69c..909e3be4853a 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -359,7 +359,7 @@ spec = r_2.at 'Id' . to_vector . should_equal [4,2,1,3,5,6] Test.specify 'should allow sorting with specific by-column rules' <| - r_1 = df.sort by=['Quantity', (Order_Rule 'Price' order=Sort_Direction.Descending)] + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity", Sort_Column.Name "Price" Sort_Direction.Descending]) r_1.at 'Id' . to_vector . should_equal [4,2,3,1,6,5] Test.specify 'should respect defined comparison operations for custom types' <| From b7ee78cf78ba1975f18223346a86f1c35fc23bdf Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 10 Jun 2022 15:43:16 +0100 Subject: [PATCH 09/23] Remove rest of `sort`. Now to fix tests... --- test/Table_Tests/src/Aggregate_Spec.enso | 2 +- .../src/Database/Codegen_Spec.enso | 13 +++---- .../Table_Tests/src/Database/Common_Spec.enso | 34 ++++++------------- test/Table_Tests/src/Table_Spec.enso | 33 ++++-------------- 4 files changed, 22 insertions(+), 60 deletions(-) diff --git a/test/Table_Tests/src/Aggregate_Spec.enso b/test/Table_Tests/src/Aggregate_Spec.enso index 3e69b2d65da9..9fa97d421c56 100644 --- a/test/Table_Tests/src/Aggregate_Spec.enso +++ b/test/Table_Tests/src/Aggregate_Spec.enso @@ -1145,7 +1145,7 @@ aggregate_spec prefix table empty_table table_builder materialize is_database te table = table_builder [["A", [1, 1, 2, 1]], ["B", [3, 2, 2, 3]], ["C", [11, 12, 13, 14]]] grouped = table.aggregate [Group_By "B", Group_By "A"] grouped.row_count . should_equal 3 - materialized = materialize grouped . sort ["A", "B"] + materialized = materialize grouped . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B"]) materialized.columns.length . should_equal 2 materialized.columns.at 1 . name . should_equal "A" materialized.columns.at 1 . to_vector . should_equal [1, 1, 2] diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 822c2ef57c7c..ef585c63956e 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -147,27 +147,22 @@ spec = Test.group "[Codegen] Sorting" <| Test.specify "should allow sorting by a single column name" <| - r1 = t1.sort by="A" . at "B" + r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . at "B" r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST', []] - r2 = t1.sort by="B" missing_last=False order=Sort_Direction.Descending . at "A" + r2 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "B" Sort_Direction.Descending]) . at "A" r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC NULLS FIRST', []] Test.specify 'should allow sorting by multiple column names' <| - r1 = t1.sort by=['A', 'B'] + r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'A', Sort_Column.Name 'B']) r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" ASC NULLS LAST', []] - Test.specify 'should allow sorting by expressions' <| - sum = t1.at 'A' + t1.at 'B' - r1 = t1.sort by=sum . at "C" - r1.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY ("T1"."A" + "T1"."B") ASC NULLS LAST', []] - Test.specify 'should allow sorting with specific by-column rules' <| r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending]) r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" DESC NULLS LAST', []] Test.specify 'should return dataflow error when passed a non-existent column' <| - r = t1.sort by='foobar' + r = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) r.should_fail_with No_Such_Column_Error Test.group "Helpers" <| diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index 6168ea41275c..38ee56d4da7f 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -128,16 +128,16 @@ spec prefix connection pending=Nothing = the Dataframes library, so it is independent of the library under testing here. Test.specify "should allow joining tables index-on-index" <| - r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . sort by=['y', 'z'] + r_1 = a.set_index 'x' . join (b.set_index 'w') . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] - r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . sort by=['x', 'w'] + r_2 = a.set_index 'y' . join (b.set_index 'z') drop_unmatched=True . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w']) r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6] r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] Test.specify "should allow joining tables column-on-index" <| - r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . sort by=['y', 'z'] + r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . sort by=['x', 'w'] @@ -145,7 +145,7 @@ spec prefix connection pending=Nothing = r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] Test.specify "should allow self-joins and append suffixes to disambiguate column names" <| - r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . sort by='x' + r_1 = a.join (a.set_index 'x') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x']) r_1.columns.map .name . should_equal ['x', 'y_left', 'y_right'] r_1.at 'x' . to_vector . should_equal [0, 1, 3, 6, 7] expected_y = ['foo', 'bar', 'spam', 'eggs', 'baz'] @@ -163,7 +163,7 @@ spec prefix connection pending=Nothing = tb_2 = tb.set_index "id" res = (tc.join ta_2 on="id_a") . join tb_2 on="id_b" left_suffix="_a" right_suffix="_b" sel = res.select_columns (By_Name ["name_a", "name_b"]) - df = sel.to_dataframe . sort by="name_a" + df = sel.to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "name_a"]) df . at "name_a" . to_vector . should_equal ["Foo", "Hmm"] df . at "name_b" . to_vector . should_equal ["Bar", "Hmm"] @@ -219,37 +219,25 @@ spec prefix connection pending=Nothing = Materialized_Table.new [["id", [1,2,3,4,5,6]], ["name", ["shoes","trousers","dress","skirt","blouse","t-shirt"]], ["quantity", [20,10,20,10,30,30]], ["rating", [3.0,Nothing,7.3,3.0,2.2,Nothing]], ["price", [37.2,42.1,64.1,87.4,13.5,64.2]]] Test.specify "should allow sorting by a single column name" <| - r_1 = df.sort by="quantity" + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity']) r_1.at 'id' . to_vector . should_equal [2,4,1,3,5,6] - r_2 = df.sort by="rating" missing_last=False - r_2.at 'id' . to_vector . should_equal [2,6,5,1,4,3] - - r_3 = df.sort by="rating" missing_last=False order=Sort_Direction.Descending + r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending]) r_3.at 'id' . to_vector . should_equal [2,6,3,1,4,5] Test.specify 'should allow sorting by multiple column names' <| r_1 = df.sort by=['quantity', 'rating'] r_1.at 'id' . to_vector . should_equal [4,2,1,3,5,6] - r_2 = df.sort by=['rating', 'quantity'] missing_last=False order=Sort_Direction.Descending + r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending, Sort_Column.Name 'quantity' Sort_Direction.Descending]) r_2.at 'id' . to_vector . should_equal [6,2,3,1,4,5] - Test.specify 'should allow sorting by external columns' <| - quality_ratio = df.at 'rating' / df.at 'price' - - r_1 = df.sort by=quality_ratio - r_1.at 'id' . to_vector . should_equal [4,1,3,5,2,6] - - r_2 = df.sort by=['quantity', quality_ratio] - r_2.at 'id' . to_vector . should_equal [4,2,1,3,5,6] - Test.specify 'should allow sorting with specific by-column rules' <| r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending]) r_1.at 'id' . to_vector . should_equal [4,2,3,1,6,5] Test.specify 'should return dataflow error when passed a non-existent column' <| - r = df.sort by='foobar' + r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) r.should_fail_with No_Such_Column_Error Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <| @@ -259,7 +247,7 @@ spec prefix connection pending=Nothing = texts = ["foo", "foo", "bar", "baz", "spam"] df = upload "T8" <| Materialized_Table.new [["ord", [0,3,2,4,1]], ["ints", ints], ["reals", reals], ["bools", bools], ["texts", texts]] - r = df.sort by='ord' + r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord']) r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4] df.at 'ints' . to_vector . should_equal ints @@ -328,7 +316,7 @@ spec prefix connection pending=Nothing = (InMemory) table are ordered according to a specified column or list of columns. determinize_by order_column table = - table.sort by=order_column + table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column order_column]) Test.specify "should allow counting group sizes and elements" <| aggregates = [Count Nothing, Count_Not_Nothing "price", Count_Nothing "price"] diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 909e3be4853a..6e56e8f1fdee 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -333,31 +333,19 @@ spec = df = (Enso_Project.data / "clothes.csv").read Test.specify "should allow sorting by a single column name" <| - r_1 = df.sort by="Quantity" + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity"]) r_1.at 'Id' . to_vector . should_equal [2,4,1,3,5,6] - r_2 = df.sort by="Rating" missing_last=False - r_2.at 'Id' . to_vector . should_equal [2,6,5,1,4,3] - - r_3 = df.sort by="Rating" missing_last=False order=Sort_Direction.Descending + r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Rating" Sort_Direction.Descending]) r_3.at 'Id' . to_vector . should_equal [2,6,3,1,4,5] Test.specify 'should allow sorting by multiple column names' <| - r_1 = df.sort by=['Quantity', 'Rating'] + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating']) r_1.at 'Id' . to_vector . should_equal [4,2,1,3,5,6] - r_2 = df.sort by=['Rating', 'Quantity'] missing_last=False order=Sort_Direction.Descending + r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Rating' Sort_Direction.Descending, Sort_Column.Name 'Quantity' Sort_Direction.Descending]) r_2.at 'Id' . to_vector . should_equal [6,2,3,1,4,5] - Test.specify 'should allow sorting by external columns' <| - quality_ratio = df.at 'Rating' / df.at 'Price' - - r_1 = df.sort by=quality_ratio - r_1.at 'Id' . to_vector . should_equal [4,1,3,5,2,6] - - r_2 = df.sort by=['Quantity', quality_ratio] - r_2.at 'Id' . to_vector . should_equal [4,2,1,3,5,6] - Test.specify 'should allow sorting with specific by-column rules' <| r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity", Sort_Column.Name "Price" Sort_Direction.Descending]) r_1.at 'Id' . to_vector . should_equal [4,2,3,1,6,5] @@ -366,20 +354,11 @@ spec = c_1 = ['id', [1, 2, 3, 4, 5, 6]] c_2 = ['val', [My 1 2, My 3 4, My 2 1, My 5 2, My 7 0, My 4 -1]] df = Table.new [c_1, c_2] - r = df.sort by='val' - r.at 'id' . to_vector . should_equal [1,3,6,2,4,5] - - Test.specify 'should allow passing a custom comparator per column and should missing-proof it' <| - c_1 = ['id', [1, 2, 3, 4, 5, 6]] - c_2 = ['val', [My 1 2, My 2 5, My 3 4, My 6 3, Nothing, My 1 0]] - df = Table.new [c_1, c_2] - - cmp a b = (a.x-a.y).abs . compare_to (b.x-b.y).abs - r = df.sort by=(Order_Rule 'val' comparator=cmp) + r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'val']) r.at 'id' . to_vector . should_equal [1,3,6,2,4,5] Test.specify 'should return dataflow error when passed a non-existent column' <| - r = df.sort by='foobar' + r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) r.should_fail_with No_Such_Column_Error Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <| From a45a3a4cf8746a1070c61d0c48cc7cef13dbf7e0 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 10 Jun 2022 16:10:25 +0100 Subject: [PATCH 10/23] Some repairs... --- .../lib/Standard/Table/0.0.0-dev/src/Data/Table.enso | 1 + test/Table_Tests/src/Database/Codegen_Spec.enso | 2 ++ test/Table_Tests/src/Database/Common_Spec.enso | 6 ++++-- test/Table_Tests/src/Table_Spec.enso | 6 ++++-- test/Tests/src/Examples_Spec.enso | 3 --- 5 files changed, 11 insertions(+), 7 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index 67d6ef4611cb..ff80778b2114 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -32,6 +32,7 @@ polyglot java import org.enso.table.data.table.Table as Java_Table polyglot java import org.enso.table.data.table.Column as Java_Column polyglot java import org.enso.table.format.csv.Writer as Csv_Writer polyglot java import org.enso.table.format.xlsx.Writer as Spreadsheet_Writer +polyglot java import org.enso.table.operations.OrderBuilder polyglot java import java.io.StringReader ## Creates a new table from a vector of `[name, items]` pairs. diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index ef585c63956e..e4faf5935e19 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -1,5 +1,7 @@ from Standard.Base import all from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Name +import Standard.Table.Data.Sort_Column_Selector +import Standard.Table.Data.Sort_Column import project.Database.Helpers.Fake_Test_Connection import Standard.Database.Data.Dialect diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index 38ee56d4da7f..f53bf5060f19 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -4,6 +4,8 @@ from Standard.Table.Data.Column_Selector as Column_Selector_Module import By_Nam from Standard.Database import all import Standard.Table.Data.Table as Materialized_Table +import Standard.Table.Data.Sort_Column_Selector +import Standard.Table.Data.Sort_Column import Standard.Test import project.Database.Helpers.Name_Generator @@ -223,11 +225,11 @@ spec prefix connection pending=Nothing = r_1.at 'id' . to_vector . should_equal [2,4,1,3,5,6] r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending]) - r_3.at 'id' . to_vector . should_equal [2,6,3,1,4,5] + r_3.at 'id' . to_vector . should_equal [3,1,4,5,2,6] Test.specify 'should allow sorting by multiple column names' <| r_1 = df.sort by=['quantity', 'rating'] - r_1.at 'id' . to_vector . should_equal [4,2,1,3,5,6] + r_1.at 'id' . to_vector . should_equal [2,4,1,3,6,5] r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending, Sort_Column.Name 'quantity' Sort_Direction.Descending]) r_2.at 'id' . to_vector . should_equal [6,2,3,1,4,5] diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 6e56e8f1fdee..9556bfc35ac0 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -1,5 +1,7 @@ from Standard.Base import all from Standard.Table import all +import Standard.Table.Data.Sort_Column_Selector +import Standard.Table.Data.Sort_Column from Standard.Table.Data.Table as Table_Internal import Empty_Error @@ -337,11 +339,11 @@ spec = r_1.at 'Id' . to_vector . should_equal [2,4,1,3,5,6] r_3 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Rating" Sort_Direction.Descending]) - r_3.at 'Id' . to_vector . should_equal [2,6,3,1,4,5] + r_3.at 'Id' . to_vector . should_equal [3,1,4,5,2,6] Test.specify 'should allow sorting by multiple column names' <| r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating']) - r_1.at 'Id' . to_vector . should_equal [4,2,1,3,5,6] + r_1.at 'Id' . to_vector . should_equal [2,4,1,3,6,5] r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Rating' Sort_Direction.Descending, Sort_Column.Name 'Quantity' Sort_Direction.Descending]) r_2.at 'Id' . to_vector . should_equal [6,2,3,1,4,5] diff --git a/test/Tests/src/Examples_Spec.enso b/test/Tests/src/Examples_Spec.enso index 89724e901aac..4ca74f59765c 100644 --- a/test/Tests/src/Examples_Spec.enso +++ b/test/Tests/src/Examples_Spec.enso @@ -112,9 +112,6 @@ spec = Test.group "Examples" <| Examples.text_column_1 Examples.text_column_2 - Test.specify "should provide an aggregate column" <| - Examples.aggregate_column - Test.specify "should provide various example tables" <| Examples.inventory_table Examples.popularity_table From 094144a1563ae8f9a1988ef9c3030c93d873d822 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 10 Jun 2022 17:37:27 +0100 Subject: [PATCH 11/23] More fixes ... --- .../Standard/Database/0.0.0-dev/src/Data/Column.enso | 8 +++++--- test/Table_Tests/src/Database/Codegen_Spec.enso | 8 ++++---- test/Table_Tests/src/Database/Common_Spec.enso | 11 ++++------- test/Table_Tests/src/Table_Spec.enso | 4 ++-- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index a64fccf30684..1e8fd4afb348 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -4,6 +4,8 @@ import Standard.Database.Data.Internal.Helpers import Standard.Database.Data.Internal.IR import Standard.Database.Data.Table import Standard.Table.Data.Column as Materialized_Column +import Standard.Table.Data.Sort_Column_Selector +import Standard.Table.Data.Sort_Column from Standard.Database.Data.Sql import Sql_Type from Standard.Database.Data.Table import Integrity_Error @@ -461,9 +463,9 @@ type Column Sorting `column` in descending order, placing missing values at the top of the resulting column. column.sort order=Sort_Direction.Descending missing_last=False - sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column - sort order=Sort_Direction.Ascending missing_last=True = - this.to_table.sort by=this order=order missing_last=missing_last . at this.name + sort : Sort_Direction -> Column + sort order=Sort_Direction.Ascending = + this.to_table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column this order]) . at this.name ## UNSTABLE diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index e4faf5935e19..18ba8c62261a 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -150,18 +150,18 @@ spec = Test.group "[Codegen] Sorting" <| Test.specify "should allow sorting by a single column name" <| r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . at "B" - r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST', []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."B" AS "B" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC', []] r2 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "B" Sort_Direction.Descending]) . at "A" - r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC NULLS FIRST', []] + r2.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A" FROM "T1" AS "T1" ORDER BY "T1"."B" DESC', []] Test.specify 'should allow sorting by multiple column names' <| r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'A', Sort_Column.Name 'B']) - r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" ASC NULLS LAST', []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" ASC', []] Test.specify 'should allow sorting with specific by-column rules' <| r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending]) - r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC NULLS LAST, "T1"."B" DESC NULLS LAST', []] + r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" DESC', []] Test.specify 'should return dataflow error when passed a non-existent column' <| r = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index f53bf5060f19..b2eea410d1ab 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -142,7 +142,7 @@ spec prefix connection pending=Nothing = r_1 = a.join (b.set_index 'w') on='x' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'y', Sort_Column.Name 'z']) r_1.at 'y' . to_vector . should_equal ['bar', 'baz', 'eggs', 'foo', 'spam', 'spam', 'spam'] r_1.at 'z' . to_vector . should_equal [Nothing, Nothing, 'foo', Nothing, 'bar', 'eggs', 'foo'] - r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . sort by=['x', 'w'] + r_2 = a.join (b.set_index 'z') drop_unmatched=True on='y' . to_dataframe . order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'x', Sort_Column.Name 'w']) r_2.at 'x' . to_vector . should_equal [0, 0, 1, 1, 3, 6] r_2.at 'w' . to_vector . should_equal [3, 6, 3, 5, 5, 3] @@ -228,11 +228,11 @@ spec prefix connection pending=Nothing = r_3.at 'id' . to_vector . should_equal [3,1,4,5,2,6] Test.specify 'should allow sorting by multiple column names' <| - r_1 = df.sort by=['quantity', 'rating'] + r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'quantity', Sort_Column.Name 'rating']) r_1.at 'id' . to_vector . should_equal [2,4,1,3,6,5] r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'rating' Sort_Direction.Descending, Sort_Column.Name 'quantity' Sort_Direction.Descending]) - r_2.at 'id' . to_vector . should_equal [6,2,3,1,4,5] + r_2.at 'id' . to_vector . should_equal [3,1,4,5,6,2] Test.specify 'should allow sorting with specific by-column rules' <| r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending]) @@ -267,14 +267,11 @@ spec prefix connection pending=Nothing = c = df.at 'rating' r_1 = c.sort - r_1.to_vector.should_equal [2.2, 3.0, 3.0, 7.3, Nothing, Nothing] + r_1.to_vector.should_equal [Nothing, Nothing, 2.2, 3.0, 3.0, 7.3] r_2 = c.sort order=Sort_Direction.Descending r_2.to_vector.should_equal [7.3, 3.0, 3.0, 2.2, Nothing, Nothing] - r_3 = c.sort order=Sort_Direction.Descending missing_last=False - r_3.to_vector.should_equal [Nothing, Nothing, 7.3, 3.0, 3.0, 2.2] - Test.group prefix+"Index" pending=pending <| t0 = upload "Tix" <| Materialized_Table.new [["ix", [1,2,3]], ["c1", [4,5,6]]] diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 9556bfc35ac0..4761759556d0 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -346,7 +346,7 @@ spec = r_1.at 'Id' . to_vector . should_equal [2,4,1,3,6,5] r_2 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Rating' Sort_Direction.Descending, Sort_Column.Name 'Quantity' Sort_Direction.Descending]) - r_2.at 'Id' . to_vector . should_equal [6,2,3,1,4,5] + r_2.at 'Id' . to_vector . should_equal [3,1,4,5,6,2] Test.specify 'should allow sorting with specific by-column rules' <| r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "Quantity", Sort_Column.Name "Price" Sort_Direction.Descending]) @@ -372,7 +372,7 @@ spec = objs = [Cons 1 2, Cons 2 3, Cons 6 7, Cons 8 9, Cons 10 30] df = Table.new [['ord', ord], ['ints', ints], ['reals', reals], ['bools', bools], ['texts', texts], ['objs', objs]] - r = df.sort by='ord' + r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'ord']) r.at 'ints' . to_vector . should_equal [1, 5, 3, 2, 4] df.at 'ints' . to_vector . should_equal ints From 61ac8ec07f3f2f13ec341fa79f6e121b897f13b7 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 10 Jun 2022 18:11:47 +0100 Subject: [PATCH 12/23] Repair `column.sort` for Database --- .../Database/0.0.0-dev/src/Data/Column.enso | 6 ++--- .../Database/0.0.0-dev/src/Data/Table.enso | 26 ++++++++++++++++++- .../Table/0.0.0-dev/src/Data/Table.enso | 12 --------- 3 files changed, 27 insertions(+), 17 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index 1e8fd4afb348..d0b02efc5a48 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -460,9 +460,8 @@ type Column column.sort > Example - Sorting `column` in descending order, placing missing values at the - top of the resulting column. - column.sort order=Sort_Direction.Descending missing_last=False + Sorting `column` in descending order. + column.sort order=Sort_Direction.Descending sort : Sort_Direction -> Column sort order=Sort_Direction.Ascending = this.to_table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column this order]) . at this.name @@ -639,4 +638,3 @@ lift_aggregate new_name connection expected_type expr context = new_ixes = cols.second new_ctx = IR.subquery_as_ctx subquery . set_index new_ixes Column new_name connection new_col.sql_type new_col.expression new_ctx - diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index 5d690f7461d5..5eed397f31d2 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -609,7 +609,31 @@ type Table Table new_table_name this.connection new_columns new_ctx - ## Prototype Group By function + ## ALIAS group, summarize + + Aggregates the rows in a table using any `Group_By` entries in columns. + The columns argument specifies which additional aggregations to perform and to return. + + Arguments: + - columns: Vector of `Aggregate_Column` specifying the aggregated table. + - on_problems: Specifies how to handle problems if they occur, reporting + them as warnings by default. + + The following problems can occur: + - If a column name is not in the input table, a `Missing_Input_Columns`. + - If a column index is out of range, a `Column_Indexes_Out_Of_Range`. + - If there are no valid columns in the output table, a `No_Output_Columns`. + - If there are invalid column names in the output table, a `Invalid_Output_Column_Names`. + - If there are duplicate column names in the output table, a `Duplicate_Output_Column_Names`. + - If grouping on or computing the `Mode` on a floating point number, a `Floating_Point_Grouping`. + - If an aggregation fails, an `Invalid_Aggregation_Method`. + - If when concatenating values there is an quoted delimited, an `Unquoted_Delimiter` + - If there are more than 10 issues with a single column, an `Additional_Warnings`. + + > Example + Group by the Key column, count the rows + + table.aggregate [Group_By "Key", Count Nothing] aggregate : [Aggregate_Column] -> Problem_Behavior -> Table aggregate columns (on_problems=Report_Warning) = validated = Aggregate_Column_Helper.prepare_aggregate_columns columns this diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index ff80778b2114..02ab062dd0b6 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -496,7 +496,6 @@ type Table new_names = this.columns.map mapper this.take_end (this.length - 1) . rename_columns (Column_Mapping.By_Position new_names) on_problems=on_problems - ## ALIAS group, summarize Aggregates the rows in a table using any `Group_By` entries in columns. @@ -1328,17 +1327,6 @@ print_table header rows indices_count format_term = " " + y ([" " + header_line, divider] + row_lines).join '\n' -## PRIVATE - - Wraps the Enso comparator function so it's usable in Java. - - Arguments: - - cmp: The Enso comparator function. - - x: The left operand to the comparator. - - y: The right operand to the comparator. -comparator_to_java : (Any -> Any -> Ordering) -> Any -> Any -> Integer -comparator_to_java cmp x y = cmp x y . to_sign - Table.from (that : Text) (format:File_Format.Delimited|File_Format.Fixed_Width = File_Format.Delimited '\t') (on_problems:Problem_Behavior=Report_Warning) = java_reader = StringReader.new that Delimited_Reader.read_from_reader format java_reader on_problems From 39d7937a55180200ac6f4c6b56010ed78f6b19b8 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Fri, 10 Jun 2022 18:19:32 +0100 Subject: [PATCH 13/23] Just 1 more issue to resolve... --- .../lib/Standard/Table/0.0.0-dev/src/Data/Column.enso | 5 +++-- test/Table_Tests/src/Database/Common_Spec.enso | 4 +--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 7397dd967392..31a46dc0242d 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -996,14 +996,15 @@ type Column Examples.decimal_column.sort comparator=my_comparator sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column sort order=Sort_Direction.Ascending missing_last=True comparator=Nothing = + comparator_to_java cmp x y = cmp x y . to_sign order_bool = case order of Sort_Direction.Ascending -> True Sort_Direction.Descending -> False java_cmp = case comparator of Nothing -> Nothing - cmp -> Table.comparator_to_java cmp + cmp -> comparator_to_java cmp rule = OrderBuilder.OrderRule.new this.java_column java_cmp order_bool missing_last - fallback_cmp = Table.comparator_to_java .compare_to + fallback_cmp = comparator_to_java .compare_to mask = OrderBuilder.buildOrderMask [rule].to_array fallback_cmp new_col = this.java_column.applyMask mask Column new_col diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index b2eea410d1ab..f4500d337a11 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -278,8 +278,6 @@ spec prefix connection pending=Nothing = t = t0.set_index 'ix' Test.specify "should be accessible by `at` like other columns" <| t.at 'ix' . to_vector . should_equal t.index.to_vector - Test.specify "should be accessible by `select_columns` like other columns" <| - t.select_columns (By_Name ['ix']) . columns . first . to_vector . should_equal t.index.to_vector Test.specify "treated as a column indexed by itself should still correctly compute values" <| col = t.index+10 vec = [11, 12, 13] @@ -315,7 +313,7 @@ spec prefix connection pending=Nothing = (InMemory) table are ordered according to a specified column or list of columns. determinize_by order_column table = - table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column order_column]) + table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name order_column]) Test.specify "should allow counting group sizes and elements" <| aggregates = [Count Nothing, Count_Not_Nothing "price", Count_Nothing "price"] From ff6d7984dbe40a69b9c251216fd236f04abdb394 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 13 Jun 2022 09:58:42 +0100 Subject: [PATCH 14/23] Separate Examples tests from Tests. --- test/Example_Tests/README.md | 2 ++ test/Example_Tests/package.yaml | 7 +++++++ test/{Tests => Example_Tests}/src/Examples_Spec.enso | 0 test/Example_Tests/src/Main.enso | 8 ++++++++ test/Tests/src/Main.enso | 2 -- 5 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 test/Example_Tests/README.md create mode 100644 test/Example_Tests/package.yaml rename test/{Tests => Example_Tests}/src/Examples_Spec.enso (100%) create mode 100644 test/Example_Tests/src/Main.enso diff --git a/test/Example_Tests/README.md b/test/Example_Tests/README.md new file mode 100644 index 000000000000..8b2827397bcb --- /dev/null +++ b/test/Example_Tests/README.md @@ -0,0 +1,2 @@ +This is a set of tests for the `Examples` library for Enso. + diff --git a/test/Example_Tests/package.yaml b/test/Example_Tests/package.yaml new file mode 100644 index 000000000000..4e8c3968b7b1 --- /dev/null +++ b/test/Example_Tests/package.yaml @@ -0,0 +1,7 @@ +name: Tests +namespace: enso_dev +enso-version: default +version: 0.0.1 +license: MIT +author: enso-dev@enso.org +maintainer: enso-dev@enso.org diff --git a/test/Tests/src/Examples_Spec.enso b/test/Example_Tests/src/Examples_Spec.enso similarity index 100% rename from test/Tests/src/Examples_Spec.enso rename to test/Example_Tests/src/Examples_Spec.enso diff --git a/test/Example_Tests/src/Main.enso b/test/Example_Tests/src/Main.enso new file mode 100644 index 000000000000..d696586cc802 --- /dev/null +++ b/test/Example_Tests/src/Main.enso @@ -0,0 +1,8 @@ +from Standard.Base import all + +import Standard.Test + +import project.Examples_Spec + +main = Test.Suite.run_main <| + Examples_Spec.spec diff --git a/test/Tests/src/Main.enso b/test/Tests/src/Main.enso index 1fc32761354c..43252d1f5c8d 100644 --- a/test/Tests/src/Main.enso +++ b/test/Tests/src/Main.enso @@ -60,8 +60,6 @@ import project.System.File_Spec import project.System.Process_Spec import project.System.Reporting_Stream_Decoder_Spec -import project.Examples_Spec - main = Test.Suite.run_main <| Any_Spec.spec Array_Spec.spec From 9be4abde4701214df805cc03e8e5866f35619943 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 13 Jun 2022 11:16:54 +0100 Subject: [PATCH 15/23] Update last few tests. --- .../lib/Standard/Database/0.0.0-dev/src/Main.enso | 1 - .../src/Internal/Aggregate_Column_Helper.enso | 1 - test/Table_Tests/src/Database/Codegen_Spec.enso | 13 +++++++++---- test/Table_Tests/src/Database/Common_Spec.enso | 11 ++++++++--- test/Table_Tests/src/Table_Spec.enso | 11 +++++++---- 5 files changed, 24 insertions(+), 13 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso index a276d4e2d1c2..9e30c69542b7 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso @@ -10,4 +10,3 @@ export Standard.Database.Connection.Connection from Standard.Database.Connection.Database export all import Standard.Table.Data.Table -from Standard.Table.Data.Table export No_Such_Column_Error diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso index ea4e59eee68e..0223cda69385 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Aggregate_Column_Helper.enso @@ -1,6 +1,5 @@ from Standard.Base import all -from Standard.Table.Data.Table as Table_Module import No_Such_Column_Error from Standard.Table.Data.Column as Column_Module import Column from Standard.Table.Data.Aggregate_Column import all from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Name, By_Index, By_Column diff --git a/test/Table_Tests/src/Database/Codegen_Spec.enso b/test/Table_Tests/src/Database/Codegen_Spec.enso index 18ba8c62261a..b9e2c3b77b4c 100644 --- a/test/Table_Tests/src/Database/Codegen_Spec.enso +++ b/test/Table_Tests/src/Database/Codegen_Spec.enso @@ -7,11 +7,13 @@ import project.Database.Helpers.Fake_Test_Connection import Standard.Database.Data.Dialect import Standard.Database.Data.Table as Table_Module import Standard.Test +import Standard.Test.Problems from Standard.Table.Data.Aggregate_Column import all from Standard.Database import all from Standard.Database.Data.Sql import Sql_Type from Standard.Table import No_Such_Column_Error +from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns from Standard.Database.Error as Database_Errors import Unsupported_Database_Operation_Error spec = @@ -50,7 +52,7 @@ spec = t3 = t2.set "bar" foo t3.to_sql.prepare . should_equal ['SELECT "T1"."C" AS "C", "T1"."B" AS "B", "T1"."A" AS "bar" FROM "T1" AS "T1"', []] - Test.specify "should fail if at is called for a nonexisting column" <| + Test.specify "should fail if at is called for a non-existent column" <| t1.at "undefined" . should_fail_with No_Such_Column_Error Test.specify "should allow to limit the amount of returned results" <| @@ -163,9 +165,12 @@ spec = r1 = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A", Sort_Column.Name "B" Sort_Direction.Descending]) r1.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1" ORDER BY "T1"."A" ASC, "T1"."B" DESC', []] - Test.specify 'should return dataflow error when passed a non-existent column' <| - r = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) - r.should_fail_with No_Such_Column_Error + Test.specify 'should return warnings and errors when passed a non-existent column' <| + action = t1.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_ + tester table = + table.to_sql.prepare . should_equal ['SELECT "T1"."A" AS "A", "T1"."B" AS "B", "T1"."C" AS "C" FROM "T1" AS "T1"', []] + problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected] + Problems.test_problem_handling action problems tester Test.group "Helpers" <| Test.specify "combine_names should combine lists of names" <| diff --git a/test/Table_Tests/src/Database/Common_Spec.enso b/test/Table_Tests/src/Database/Common_Spec.enso index f4500d337a11..ed25a84d9a99 100644 --- a/test/Table_Tests/src/Database/Common_Spec.enso +++ b/test/Table_Tests/src/Database/Common_Spec.enso @@ -7,6 +7,8 @@ import Standard.Table.Data.Table as Materialized_Table import Standard.Table.Data.Sort_Column_Selector import Standard.Table.Data.Sort_Column import Standard.Test +import Standard.Test.Problems +from Standard.Table.Error as Table_Errors import No_Input_Columns_Selected, Missing_Input_Columns import project.Database.Helpers.Name_Generator from Standard.Table.Data.Aggregate_Column import all @@ -238,9 +240,12 @@ spec prefix connection pending=Nothing = r_1 = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "quantity", Sort_Column.Name "price" Sort_Direction.Descending]) r_1.at 'id' . to_vector . should_equal [4,2,3,1,6,5] - Test.specify 'should return dataflow error when passed a non-existent column' <| - r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) - r.should_fail_with No_Such_Column_Error + Test.specify 'should return warnings and errors when passed a non-existent column' <| + action = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_ + tester table = + table.at 'id' . to_vector . should_equal [1,2,3,4,5,6] + problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected] + Problems.test_problem_handling action problems tester Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <| ints = [1, 2, 3, 4, 5] diff --git a/test/Table_Tests/src/Table_Spec.enso b/test/Table_Tests/src/Table_Spec.enso index 4761759556d0..feb720fb5287 100644 --- a/test/Table_Tests/src/Table_Spec.enso +++ b/test/Table_Tests/src/Table_Spec.enso @@ -10,7 +10,7 @@ import Standard.Table.Data.Storage import Standard.Test import Standard.Test.Problems import Standard.Visualization -from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names +from Standard.Table.Error as Table_Errors import Invalid_Output_Column_Names, Duplicate_Output_Column_Names, No_Input_Columns_Selected, Missing_Input_Columns import project.Common_Table_Spec @@ -359,9 +359,12 @@ spec = r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'val']) r.at 'id' . to_vector . should_equal [1,3,6,2,4,5] - Test.specify 'should return dataflow error when passed a non-existent column' <| - r = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) - r.should_fail_with No_Such_Column_Error + Test.specify 'should return warnings and errors when passed a non-existent column' <| + action = df.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'foobar']) on_problems=_ + tester table = + table.at 'Id' . to_vector . should_equal [1,2,3,4,5,6] + problems = [Missing_Input_Columns [Sort_Column.Name 'foobar'], No_Input_Columns_Selected] + Problems.test_problem_handling action problems tester Test.specify 'should correctly reorder all kinds of columns and leave the original columns untouched' <| ord = [0, 3, 2, 4, 1] From a1078f452e93d46882a64a1c9f4cb10d3140d232 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 13 Jun 2022 11:34:48 +0100 Subject: [PATCH 16/23] Change log. --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65709ded6ccf..e8ca6c4c0919 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -139,6 +139,7 @@ API and added builders for customizing less common settings.][3516] - [Allow control of sort direction in `First` and `Last` aggregations.][3517] - [Implemented `Text.write`, replacing `File.write_text`.][3518] +- [Removed `select`, `group`, `sort` and releated type from tables.][3519] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -219,6 +220,7 @@ [3516]: https://github.com/enso-org/enso/pull/3516 [3517]: https://github.com/enso-org/enso/pull/3517 [3518]: https://github.com/enso-org/enso/pull/3518 +[3519]: https://github.com/enso-org/enso/pull/3519 #### Enso Compiler From 2b7bedd435c8980ada114cae048691d5424022d7 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 13 Jun 2022 12:00:46 +0100 Subject: [PATCH 17/23] Remove a blank line. --- test/Example_Tests/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Example_Tests/README.md b/test/Example_Tests/README.md index 8b2827397bcb..bf731920bf64 100644 --- a/test/Example_Tests/README.md +++ b/test/Example_Tests/README.md @@ -1,2 +1 @@ This is a set of tests for the `Examples` library for Enso. - From 4d7f6d598004fda4b18160da041a04d303af74cc Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 13 Jun 2022 13:25:00 +0100 Subject: [PATCH 18/23] PR comment. --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e8ca6c4c0919..b482bf1a9537 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -139,7 +139,8 @@ API and added builders for customizing less common settings.][3516] - [Allow control of sort direction in `First` and `Last` aggregations.][3517] - [Implemented `Text.write`, replacing `File.write_text`.][3518] -- [Removed `select`, `group`, `sort` and releated type from tables.][3519] +- [Removed obsolete `select`, `group`, `sort` and releated types from tables.] + [3519] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug From 52b1b74e52eb3e9836bf44478db78b438b921c39 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 13 Jun 2022 13:28:20 +0100 Subject: [PATCH 19/23] Update legacy build and rename project folder. --- .github/workflows/scala.yml | 4 ++++ test/{Example_Tests => Examples_Tests}/README.md | 0 test/{Example_Tests => Examples_Tests}/package.yaml | 0 test/{Example_Tests => Examples_Tests}/src/Examples_Spec.enso | 0 test/{Example_Tests => Examples_Tests}/src/Main.enso | 0 5 files changed, 4 insertions(+) rename test/{Example_Tests => Examples_Tests}/README.md (100%) rename test/{Example_Tests => Examples_Tests}/package.yaml (100%) rename test/{Example_Tests => Examples_Tests}/src/Examples_Spec.enso (100%) rename test/{Example_Tests => Examples_Tests}/src/Main.enso (100%) diff --git a/.github/workflows/scala.yml b/.github/workflows/scala.yml index 0dc78837f868..bd6045293020 100644 --- a/.github/workflows/scala.yml +++ b/.github/workflows/scala.yml @@ -286,6 +286,7 @@ jobs: $ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Geo_Tests $ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Visualization_Tests $ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Image_Tests + $ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Examples_Tests - name: Compile the Standard Libraries (Unix) shell: bash @@ -311,6 +312,7 @@ jobs: $ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Geo_Tests $ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Visualization_Tests $ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Image_Tests + $ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Examples_Tests - name: Test Engine Distribution Without Caches (Windows) shell: bash @@ -321,6 +323,7 @@ jobs: $ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Geo_Tests $ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Visualization_Tests $ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Image_Tests + $ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Examples_Tests - name: Compile the Standard Libraries (Windows) shell: bash @@ -346,6 +349,7 @@ jobs: $ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Geo_Tests $ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Visualization_Tests $ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Image_Tests + $ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Examples_Tests # Publish - name: Compress the built artifacts for upload diff --git a/test/Example_Tests/README.md b/test/Examples_Tests/README.md similarity index 100% rename from test/Example_Tests/README.md rename to test/Examples_Tests/README.md diff --git a/test/Example_Tests/package.yaml b/test/Examples_Tests/package.yaml similarity index 100% rename from test/Example_Tests/package.yaml rename to test/Examples_Tests/package.yaml diff --git a/test/Example_Tests/src/Examples_Spec.enso b/test/Examples_Tests/src/Examples_Spec.enso similarity index 100% rename from test/Example_Tests/src/Examples_Spec.enso rename to test/Examples_Tests/src/Examples_Spec.enso diff --git a/test/Example_Tests/src/Main.enso b/test/Examples_Tests/src/Main.enso similarity index 100% rename from test/Example_Tests/src/Main.enso rename to test/Examples_Tests/src/Main.enso From bdc1bab3fc7c997a7211d7548c3c30f38ad6b0cf Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Mon, 13 Jun 2022 15:31:42 +0100 Subject: [PATCH 20/23] Sort imports. --- distribution/lib/Standard/Table/0.0.0-dev/package.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/distribution/lib/Standard/Table/0.0.0-dev/package.yaml b/distribution/lib/Standard/Table/0.0.0-dev/package.yaml index 9d9858ef5cfa..04f37e22703c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/package.yaml +++ b/distribution/lib/Standard/Table/0.0.0-dev/package.yaml @@ -29,10 +29,10 @@ component-groups: - Standard.Base.Join: exports: - Standard.Table.Data.Table.Table.join - - Standard.Table.Data.Table.Table.group + - Standard.Table.Data.Table.Table.aggregate - Standard.Base.Transform: exports: - - Standard.Table.Data.Table.Table.sort + - Standard.Table.Data.Table.Table.order_by - Standard.Table.Data.Table.Table.to_csv - Standard.Table.Data.Column.Column.to_table - Standard.Base.Output: From 24293bf983d805e7a9acfda70ce5c12165c9bf4a Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 14 Jun 2022 08:47:12 +0100 Subject: [PATCH 21/23] Fix component list. --- distribution/lib/Standard/Database/0.0.0-dev/package.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/distribution/lib/Standard/Database/0.0.0-dev/package.yaml b/distribution/lib/Standard/Database/0.0.0-dev/package.yaml index 30b6ddfeec6c..e034c09d539b 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/package.yaml +++ b/distribution/lib/Standard/Database/0.0.0-dev/package.yaml @@ -24,8 +24,8 @@ component-groups: - Standard.Base.Join: exports: - Standard.Database.Data.Table.Table.join - - Standard.Database.Data.Table.Table.group + - Standard.Database.Data.Table.Table.aggregate - Standard.Base.Transform: exports: - - Standard.Database.Data.Table.Table.sort + - Standard.Database.Data.Table.Table.order_by - Standard.Database.Data.Column.Column.to_table From e3c324c697e161eb79a756bdd89861065c26c408 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 14 Jun 2022 10:29:38 +0100 Subject: [PATCH 22/23] Remove old test. --- test/Tests/src/Main.enso | 1 - 1 file changed, 1 deletion(-) diff --git a/test/Tests/src/Main.enso b/test/Tests/src/Main.enso index 43252d1f5c8d..bc9e8af8f923 100644 --- a/test/Tests/src/Main.enso +++ b/test/Tests/src/Main.enso @@ -68,7 +68,6 @@ main = Test.Suite.run_main <| Conversion_Spec.spec Deep_Export_Spec.spec Error_Spec.spec - Examples_Spec.spec File_Spec.spec Reporting_Stream_Decoder_Spec.spec Http_Header_Spec.spec From b57f4c4cf19714e6a2e56c8f577e7032dd8f0009 Mon Sep 17 00:00:00 2001 From: James Dunkerley Date: Tue, 14 Jun 2022 11:43:10 +0100 Subject: [PATCH 23/23] Remove legacy tests. --- test/Visualization_Tests/src/Table_Spec.enso | 30 ++------------------ 1 file changed, 3 insertions(+), 27 deletions(-) diff --git a/test/Visualization_Tests/src/Table_Spec.enso b/test/Visualization_Tests/src/Table_Spec.enso index d566b8c899fb..2251e52eed19 100644 --- a/test/Visualization_Tests/src/Table_Spec.enso +++ b/test/Visualization_Tests/src/Table_Spec.enso @@ -3,6 +3,7 @@ from Standard.Base import all from Standard.Database import all import Standard.Database.Data.Table as Database_Table import Standard.Table.Data.Table as Dataframe_Table +from Standard.Table.Data.Aggregate_Column import Group_By, Average import Standard.Visualization.Table.Visualization as Visualization import Standard.Test @@ -47,19 +48,9 @@ visualization_spec connection = json = make_json header=["A"] data=[['a', 'a']] all_rows=3 ixes_header=[] ixes=[] vis . should_equal json - g = t.group by=["A", "B"] . at "C" . mean + g = t.aggregate [Group_By "A", Group_By "B", Average "C"] . at "Average C" vis2 = Visualization.prepare_visualization g 1 - json2 = make_json header=["C_mean"] data=[[4]] all_rows=2 ixes_header=["A", "B"] ixes=[['a'], [2]] - vis2 . should_equal json2 - - Test.specify "should visualize database aggregates" <| - agg = t.group by="A" - vis = Visualization.prepare_visualization agg 1 - json = make_json header=["B", "C"] data=[[2], [3]] all_rows=3 ixes_header=["A"] ixes=[['a']] - vis . should_equal json - - vis2 = Visualization.prepare_visualization (agg.at "C") 1 - json2 = make_json header=["C"] data=[[3]] all_rows=3 ixes_header=["A"] ixes=[['a']] + json2 = make_json header=["Average C"] data=[[4.0]] all_rows=2 ixes_header=[] ixes=[] vis2 . should_equal json2 t2 = Dataframe_Table.new [["A", [1, 2, 3]], ["B", [4, 5, 6]], ["C", [7, 8, 9]]] @@ -78,21 +69,6 @@ visualization_spec connection = json = make_json header=["A"] data=[[1, 2]] all_rows=3 ixes_header=[""] ixes=[[0, 1]] vis . should_equal json - g = t2.group by="A" . at "C" . mean - vis2 = Visualization.prepare_visualization g 1 - json2 = make_json header=["C_mean"] data=[[7.0]] all_rows=3 ixes_header=["A"] ixes=[[1]] - vis2 . should_equal json2 - - Test.specify "should visualize dataframe aggregates" <| - agg = t2.group by="A" - vis = Visualization.prepare_visualization agg 1 - json = make_json header=["B", "C"] data=[[4], [7]] all_rows=3 ixes_header=["A"] ixes=[[1]] - vis . should_equal json - - vis2 = Visualization.prepare_visualization (agg.at "C") 1 - json2 = make_json header=["C"] data=[[7]] all_rows=3 ixes_header=["A"] ixes=[[1]] - vis2 . should_equal json2 - Test.specify "should handle Vectors" <| vis = Visualization.prepare_visualization [1, 2, 3] 2