Skip to content

Commit

Permalink
Removing old functions and tidy up of Table types (#3519)
Browse files Browse the repository at this point in the history
- Removed `select` method.
- Removed `group` method.
- Removed `Aggregate_Table` type.
- Removed `Order_Rule` type.
- Removed `sort` method from Table.
- Expanded comments on `order_by`.
- Update comment on `aggregate` on Database.
- Update Visualisation to use new APIs.
- Updated Data Science examples to use new APIs.
- Moved Examples test out of Tests to own test.

# Important Notes
Need to get Examples_Tests added to CI.
  • Loading branch information
jdunkerley authored and kazcw committed Jun 29, 2022
1 parent 0cb8af4 commit fa0e974
Show file tree
Hide file tree
Showing 30 changed files with 218 additions and 844 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/scala.yml
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,7 @@ jobs:
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Image_Tests
$ENGINE_DIST_DIR/bin/enso --no-ir-caches --run test/Examples_Tests
- name: Compile the Standard Libraries (Unix)
shell: bash
Expand All @@ -311,6 +312,7 @@ jobs:
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Image_Tests
$ENGINE_DIST_DIR/bin/enso --ir-caches --run test/Examples_Tests
- name: Test Engine Distribution Without Caches (Windows)
shell: bash
Expand All @@ -321,6 +323,7 @@ jobs:
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Image_Tests
$ENGINE_DIST_DIR/bin/enso.bat --no-ir-caches --run test/Examples_Tests
- name: Compile the Standard Libraries (Windows)
shell: bash
Expand All @@ -346,6 +349,7 @@ jobs:
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Geo_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Visualization_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Image_Tests
$ENGINE_DIST_DIR/bin/enso.bat --ir-caches --run test/Examples_Tests
# Publish
- name: Compress the built artifacts for upload
Expand Down
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@
API and added builders for customizing less common settings.][3516]
- [Allow control of sort direction in `First` and `Last` aggregations.][3517]
- [Implemented `Text.write`, replacing `File.write_text`.][3518]
- [Removed obsolete `select`, `group`, `sort` and releated types from tables.]
[3519]

[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
Expand Down Expand Up @@ -219,6 +221,7 @@
[3516]: https://github.com/enso-org/enso/pull/3516
[3517]: https://github.com/enso-org/enso/pull/3517
[3518]: https://github.com/enso-org/enso/pull/3518
[3519]: https://github.com/enso-org/enso/pull/3519

#### Enso Compiler

Expand Down
4 changes: 2 additions & 2 deletions distribution/lib/Standard/Database/0.0.0-dev/package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ component-groups:
- Standard.Base.Join:
exports:
- Standard.Database.Data.Table.Table.join
- Standard.Database.Data.Table.Table.group
- Standard.Database.Data.Table.Table.aggregate
- Standard.Base.Transform:
exports:
- Standard.Database.Data.Table.Table.sort
- Standard.Database.Data.Table.Table.order_by
- Standard.Database.Data.Column.Column.to_table
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ import Standard.Database.Data.Internal.Helpers
import Standard.Database.Data.Internal.IR
import Standard.Database.Data.Table
import Standard.Table.Data.Column as Materialized_Column
import Standard.Table.Data.Sort_Column_Selector
import Standard.Table.Data.Sort_Column

from Standard.Database.Data.Sql import Sql_Type
from Standard.Database.Data.Table import Integrity_Error
Expand Down Expand Up @@ -458,12 +460,11 @@ type Column
column.sort

> Example
Sorting `column` in descending order, placing missing values at the
top of the resulting column.
column.sort order=Sort_Direction.Descending missing_last=False
sort : Sort_Direction -> Boolean -> (Any -> Any -> Ordering) | Nothing -> Column
sort order=Sort_Direction.Ascending missing_last=True =
this.to_table.sort by=this order=order missing_last=missing_last . at this.name
Sorting `column` in descending order.
column.sort order=Sort_Direction.Descending
sort : Sort_Direction -> Column
sort order=Sort_Direction.Ascending =
this.to_table.order_by (Sort_Column_Selector.By_Column [Sort_Column.Column this order]) . at this.name

## UNSTABLE

Expand Down Expand Up @@ -637,4 +638,3 @@ lift_aggregate new_name connection expected_type expr context =
new_ixes = cols.second
new_ctx = IR.subquery_as_ctx subquery . set_index new_ixes
Column new_name connection new_col.sql_type new_col.expression new_ctx

219 changes: 47 additions & 172 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import Standard.Table.Internal.Aggregate_Column_Helper
from Standard.Database.Data.Column as Column_Module import Column, Aggregate_Column_Builder
from Standard.Database.Data.Internal.IR import Internal_Column
from Standard.Table.Data.Table import No_Such_Column_Error
from Standard.Table.Data.Order_Rule as Order_Rule_Module import Order_Rule
from Standard.Table.Data.Column_Selector as Column_Selector_Module import Column_Selector, By_Index
from Standard.Base.Data.Text.Text_Ordering as Text_Ordering_Module import Text_Ordering
from Standard.Table.Data.Data_Formatter as Data_Formatter_Module import Data_Formatter
Expand Down Expand Up @@ -356,7 +355,7 @@ type Table

Since this Table is backed by an SQL database, the Table returned by the
`limit` method is deterministic only if the Table has been ordered (using
the `sort` method).
the `order_by` method).

Otherwise, no order is imposed, so the returned Table will include at most
`max_rows` rows, but there are no guarantees on which rows will be
Expand All @@ -365,15 +364,15 @@ type Table
Table is materialized.

The limit is applied at the very end, so the new Table behaves exactly as
the old one, just limitting its results when being materialized.
the old one, just limiting its results when being materialized.
Specifically, applying further filters will still apply to the whole
result set and the limit will be taken after applying these filters.

> For example:
In the call below, assuming that the table of `t1` contains rows for
numbers 1, 2, ..., 10, will return rows starting from 6 and not an empty
result as one could expect if the limit was applied before the filters.
t1 = table.sort by='A' . limit 5
t1 = table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name "A"]) . limit 5
t2 = t1.where (t1.at 'A' > 5)
t2.to_dataframe
limit : Integer -> Table
Expand Down Expand Up @@ -481,6 +480,28 @@ type Table
descending order.

table.order_by (Sort_Column_Selector.By_Index [Sort_Column.Index 1, Sort_Column.Index -7 Sort_Direction.Descending])
> Example
Sorting `table` in ascending order by the value in column `'Quantity'`.

table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity'])

> Example
Sorting `table` in descending order by the value in column `'Quantity'`.

table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity' Sort_Direction.Descending])

> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
using the value in column `'Rating'` for breaking ties.

table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating'])

> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
using the value in column `'Rating'` in descending order for breaking
ties.

table.order_by (Sort_Column_Selector.By_Name [Sort_Column.Name 'Quantity', Sort_Column.Name 'Rating' Sort_Direction.Descending])
order_by : Sort_Column_Selector -> Text_Ordering -> Problem_Behavior -> Table
order_by (columns = (Sort_Column_Selector.By_Name [(Sort_Column.Name (this.columns.at 0 . name))])) text_ordering=Text_Ordering on_problems=Report_Warning = Panic.handle_wrapped_dataflow_error <|
problem_builder = Problem_Builder.new
Expand All @@ -497,91 +518,6 @@ type Table
new_ctx = this.context.add_orders new_order_descriptors
this.updated_context new_ctx

## UNSTABLE

Sorts the table according to the specified rules.

Arguments:
- by: Specifies the columns used for reordering the table. This
argument may be one of:
- a text: The text is treated as a column name.
- a column: Any column, which is an expression computed from this
table.
- an order rule: Specifies both the sorting column and additional
settings, that will take precedence over the global parameters of
this sort operation. The `column` field of the rule may be a text
or a column, with the semantics described above.
- a vector of any of the above: This will result in a hierarchical
sorting, such that the first rule is applied first, the second is
used for breaking ties, etc.
- order: Specifies the default sort order for this operation. All the
rules specified in the `by` argument will default to this setting,
unless specified in the rule.
- missing_last: Specifies the default placement of missing values when
compared to non-missing ones. This setting may be overridden by the
particular rules of the `by` argument. Note thet this argument is
independent from `order`, i.e. missing values will always be sorted
according to this rule, ignoring the ascending / descending setting.

> Example
Sorting `table` in ascending order by the value in column `'Quantity'`
table.sort by='Quantity'

> Example
Sorting `table` in descending order by the value in column `'Quantity'`,
placing missing values at the top of the table.
table.sort by='Quantity' order=Sort_Direction.Descending missing_last=False

> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
using the value in column `'Rating'` for breaking ties.
table.sort by=['Quantity', 'Rating']

> Example
Sorting `table` in ascending order by the value in column `'Quantity'`,
using the value in column `'Rating'` in descending order for breaking
ties.
table.sort by=['Quantity', Order_Rule 'Rating' (order=Sort_Direction.Descending)]

> Example
Sorting `table` in ascending order by the value in an externally
computed column, using the value in column `'Rating'` for breaking
ties.
quality_ratio = table.at 'Rating' / table.at 'Price'
table.sort by=[quality_ratio, 'Rating']
sort : Text | Column | Order_Rule | Vector.Vector (Text | Column | Order_Rule) -> Sort_Direction -> Boolean -> Table
sort by order=Sort_Direction.Ascending missing_last=True = Panic.recover Any <|
missing_to_ir last = case last of
True -> IR.Nulls_Last
False -> IR.Nulls_First
wrap_elem elem =
IR.Order_Descriptor (this.resolve elem . expression) order (missing_to_ir missing_last) collation=Nothing
to_ir elem = case elem of
Text -> wrap_elem elem
Column _ _ _ _ _ -> wrap_elem elem
Order_Rule elem Nothing my_order my_nulls ->
chosen_order = my_order.if_nothing order
chosen_nulls = my_nulls.if_nothing missing_last
IR.Order_Descriptor (this.resolve elem . expression) chosen_order (missing_to_ir chosen_nulls) collation=Nothing
Order_Rule _ _ _ _ ->
Error.throw <| Unsupported_Database_Operation_Error "Custom comparators are not supported in Database"
elems = Helpers.unify_vector_singleton by . map to_ir
new_ctx = this.context.set_orders elems
this.updated_context new_ctx

## UNSTABLE

Selects a subset of columns from this table by name.

Arguments:
- columns: The names of the columns to select from the table.
select : Vector Text -> Table
select columns =
candidates = this.internal_columns + this.context.meta_index
find_col name = candidates.find (p -> p.name == name)
selected_cols = columns.map (find_col >> .catch) . filter (c -> c.is_nothing.not)
this.updated_columns selected_cols

## UNSTABLE

Efficiently joins two tables based on either the index or a key column.
Expand Down Expand Up @@ -675,28 +611,31 @@ type Table

Table new_table_name this.connection new_columns new_ctx

## UNSTABLE
## ALIAS group, summarize

Returns an aggregate table resulting from grouping the elements by the
value of the specified column.
Aggregates the rows in a table using any `Group_By` entries in columns.
The columns argument specifies which additional aggregations to perform and to return.

Arguments:
- by: The column names on which to group. If this is not set, the index
will be used for grouping instead.
group : Vector Text | Text | Nothing -> Aggregate_Table
group by=Nothing = Panic.recover Any <|
cols = case by of
Nothing ->
if this.context.meta_index.is_empty then Panic.throw <| Illegal_State_Error "Trying to group by an empty index." else
this.context.meta_index
_ ->

Helpers.unify_vector_singleton by . map (this.resolve >> .as_internal)
exprs = cols.map .expression
new_ctx = this.context.set_groups exprs . set_index cols
Aggregate_Table this.name this.connection this.internal_columns new_ctx

## Prototype Group By function
- columns: Vector of `Aggregate_Column` specifying the aggregated table.
- on_problems: Specifies how to handle problems if they occur, reporting
them as warnings by default.

The following problems can occur:
- If a column name is not in the input table, a `Missing_Input_Columns`.
- If a column index is out of range, a `Column_Indexes_Out_Of_Range`.
- If there are no valid columns in the output table, a `No_Output_Columns`.
- If there are invalid column names in the output table, a `Invalid_Output_Column_Names`.
- If there are duplicate column names in the output table, a `Duplicate_Output_Column_Names`.
- If grouping on or computing the `Mode` on a floating point number, a `Floating_Point_Grouping`.
- If an aggregation fails, an `Invalid_Aggregation_Method`.
- If when concatenating values there is an quoted delimited, an `Unquoted_Delimiter`
- If there are more than 10 issues with a single column, an `Additional_Warnings`.

> Example
Group by the Key column, count the rows

table.aggregate [Group_By "Key", Count Nothing]
aggregate : [Aggregate_Column] -> Problem_Behavior -> Table
aggregate columns (on_problems=Report_Warning) =
validated = Aggregate_Column_Helper.prepare_aggregate_columns columns this
Expand Down Expand Up @@ -980,70 +919,6 @@ type Table
# TODO This should ideally be done in a streaming manner, or at least respect the row limits.
this.to_dataframe.write path format on_existing_file column_mapping on_problems

## Represents a table with grouped rows.
type Aggregate_Table

## UNSTABLE

Represents a table with grouped rows.

Arguments:
- name: The name of the table.
- connection: The connection with which the table is associated.
- internal_columns: The internal representation of the table columns.
- context: The context associated with this table.
# type Aggregate_Table (name : Text) (connection : Connection)
# (internal_columns : Vector [Text, IR.Expression])
# (context : IR.Context)
type Aggregate_Table name connection internal_columns context

## UNSTABLE

Returns a vector of aggregate columns in this table.
columns : Vector.Vector
columns = this.internal_columns . map this.make_column

## UNSTABLE

Returns a column containing the number of elements in each group.
count : Column
count =
expr = IR.Operation "COUNT_ROWS" []
# new_name connection expected_type expr context
Column_Module.lift_aggregate "count" this.connection Sql.Sql_Type.integer expr this.context

## UNSTABLE

Returns an aggregate column with the given name, contained in this table.

Arguments:
- name: The name of the aggregate column to get from the aggregate table.
at : Text -> Column ! No_Such_Column_Error
at name =
internal = this.internal_columns.find (p -> p.name == name)
this.make_column internal . map_error (_ -> No_Such_Column_Error name)

## PRIVATE

Helper to create aggregate columns from internal columns.

Arguments:
- internal: The internal column to make into an aggregate column.
make_column : Internal_Column -> Aggregate_Column_Builder
make_column internal =
Aggregate_Column_Builder internal.name this.connection internal.sql_type internal.expression this.context

## PRIVATE

Helper that returns the underlying table from before grouping.
ungrouped : Table
ungrouped =
new_ctx = this.context.set_groups []
new_cols = this.internal_columns.filter col->
turned_into_index = this.context.meta_index.exists i-> i.name == col.name
turned_into_index.not
Table this.name this.connection new_cols new_ctx

type Integrity_Error

## UNSTABLE
Expand Down
3 changes: 0 additions & 3 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,3 @@ export Standard.Database.Connection.Connection
from Standard.Database.Connection.Database export all

import Standard.Table.Data.Table
import Standard.Table.Data.Order_Rule
from Standard.Table.Data.Table export No_Such_Column_Error
from Standard.Table.Data.Order_Rule export Order_Rule
8 changes: 0 additions & 8 deletions distribution/lib/Standard/Examples/0.0.0-dev/src/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -278,14 +278,6 @@ transactions_table : Table.Table
transactions_table =
(Enso_Project.data / "food_shop_transactions.csv") . read

## An aggregate table for the relevant examples.
aggregate_table : Table.Aggregate_Table
aggregate_table =
transactions = here.transactions_table
item_names = here.inventory_table.at "item_name"
with_names = transactions.join item_names on="item_id"
with_names.group by="item_name"

## An example regex match.
match : Default_Engine.Match
match =
Expand Down
Loading

0 comments on commit fa0e974

Please sign in to comment.