-
Notifications
You must be signed in to change notification settings - Fork 323
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement Table.add_group_number
, with operations Unique
and Equal_Count
#11818
Changes from 12 commits
d16cfe7
c01550c
8bc55e7
eedd70a
842d22a
9796878
df8f115
57b7218
c0add8a
056957a
b1a4df5
25a4c2d
03f757c
c8779c4
d278007
c0ec57d
8524232
07c5eca
0777f3e
8441eb0
7ada84c
55a8b5c
19e4a8a
9d86f51
072093f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
from Standard.Base import all | ||
|
||
polyglot java import org.enso.table.operations.AddGroupNumber | ||
|
||
## Specifies a method for grouping rows in `add_group_number`. | ||
type Grouping_Method | ||
## Group rows by the specified `group_by`. | ||
Unique | ||
|
||
## Create the specified number of buckets with the same number of rows in | ||
each bucket (except possibly the last one). | ||
Equal_Count bucket_count:Integer | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @radeusgd Suggested we use the term 'bucket' here, such as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this whole component should be talking about buckets instead of groups. I think group is too closely assocaited with aggregating, |
jdunkerley marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
from Standard.Base import all | ||
import Standard.Base.Errors.Common.Unsupported_Argument_Types | ||
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument | ||
|
||
import project.Column.Column | ||
import project.Grouping_Method.Grouping_Method | ||
import project.Internal.Java_Problems | ||
import project.Internal.Problem_Builder.Problem_Builder | ||
import project.Internal.Table_Helpers | ||
import project.Set_Mode.Set_Mode | ||
import project.Table.Table | ||
from project.Internal.Add_Row_Number import rename_columns_if_needed | ||
|
||
polyglot java import java.lang.ArithmeticException | ||
polyglot java import org.enso.table.operations.AddGroupNumber | ||
|
||
add_group_number (table:Table) (grouping_method:Grouping_Method=..Unique) (name:Text="Group") (from:Integer=0) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) -> Table = | ||
problem_builder = Problem_Builder.new error_on_missing_columns=True | ||
grouping_columns = table.columns_helper.select_columns_helper group_by Case_Sensitivity.Default True problem_builder | ||
ordering = Table_Helpers.resolve_order_by table.columns order_by problem_builder | ||
|
||
handle_arithmetic_exception _ = | ||
Error.throw (Illegal_Argument.Error "The row number has exceeded the 64-bit integer range. BigInteger numbering is currently not supported. Please use a smaller start/step.") | ||
|
||
problem_builder.attach_problems_before on_problems <| Panic.catch ArithmeticException handler=handle_arithmetic_exception <| Panic.catch Unsupported_Argument_Types handler=handle_arithmetic_exception <| | ||
new_column = create_column table grouping_method name from step grouping_columns ordering on_problems | ||
renamed_table = rename_columns_if_needed table name on_problems Table.new | ||
renamed_table.set new_column name set_mode=Set_Mode.Add | ||
|
||
## PRIVATE | ||
create_column table grouping_method name from step grouping_columns ordering on_problems = | ||
ordering_columns = ordering.map c->c.column.java_column | ||
directions = ordering.map c->c.associated_selector.direction.to_sign | ||
grouping_java_columns = grouping_columns.map c->c.java_column | ||
new_storage = Java_Problems.with_problem_aggregator on_problems java_problem_aggregator-> | ||
create_column_with_method table.row_count grouping_method from step grouping_java_columns ordering_columns directions java_problem_aggregator | ||
Column.from_storage name new_storage | ||
|
||
## PRIVATE | ||
create_column_with_method row_count grouping_method from step grouping_java_columns ordering_columns directions java_problem_aggregator = | ||
case grouping_method of | ||
Grouping_Method.Unique -> | ||
if grouping_java_columns.is_empty || ordering_columns.is_empty.not then Error.throw (Illegal_Argument.Error "add_group_number with ..Unique requires a non-empty 'group_by' and cannot take an 'order_by' parameter") else | ||
AddGroupNumber.numberGroupsUnique row_count from step grouping_java_columns java_problem_aggregator | ||
Grouping_Method.Equal_Count bucket_count -> | ||
if grouping_java_columns.is_empty.not then Error.throw (Illegal_Argument.Error "add_group_number with ..Equal_Count cannot take a 'group_by' parameter") else | ||
AddGroupNumber.numberGroupsEqualCount row_count bucket_count from step ordering_columns directions java_problem_aggregator |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -34,6 +34,8 @@ import project.Delimited.Delimited_Format.Delimited_Format | |
import project.Expression.Expression | ||
import project.Expression.Expression_Error | ||
import project.Extensions.Table_Conversions | ||
import project.Grouping_Method.Grouping_Method | ||
import project.Internal.Add_Group_Number | ||
import project.Internal.Add_Row_Number | ||
import project.Internal.Add_Running | ||
import project.Internal.Aggregate_Column_Helper | ||
|
@@ -2317,6 +2319,109 @@ type Table | |
add_row_number self (name:Text="Row") (from:Integer=0) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) = | ||
Incomparable_Values.handle_errors <| Add_Row_Number.add_row_number self name from step group_by order_by on_problems | ||
|
||
## PRIVATE add group column, group id | ||
GROUP Standard.Base.Values | ||
ICON column_add | ||
Adds a new column to the table enumerating groups of rows, assigning each | ||
row to one group number. All rows in each group will get the sane number. | ||
|
||
Arguments: | ||
- grouping_method: Specifies how to group the rows; see "Grouping | ||
Methods", below. | ||
- name: The name of the new column. Defaults to "Group". | ||
- from: The starting value for the enumeration. Defaults to 0. | ||
- step: The amount to increment the enumeration by. Defaults to 1. | ||
- group_by: Specifies the columns to group by, for grouping methods that | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It might make more sense to put this into There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I forgot to send this -- @radeusgd you also suggested it, so I did it. |
||
use specific columns. | ||
- order_by: Specifies the columns to order by, for grouping methods that | ||
are affected by ordering. Defaults to the order of the rows in the | ||
table. | ||
|
||
? Grouping Methods | ||
|
||
The following grouping methods are supported: | ||
- `Unique`: groups rows by the values in the columns specified in | ||
`group_by`. `order_by` is not allowed to be specified. | ||
- Equal_Count: groups rows into the specified number of buckets, with | ||
each bucket containing the same number of rows, except possibly the | ||
last one. If `order_by` is specified, then the rows are allocated to | ||
successive groups in the specified order. `group_by` is not allowed | ||
to be specified. | ||
|
||
? Ordering of rows | ||
|
||
Note that the ordering of rows from the original table is preserved in | ||
all cases. The grouping and ordering settings can affect how the group | ||
numbers are assigned, depending on the grouping method. The order of | ||
the rows itself is not changed by this operation. | ||
|
||
! Error Conditions | ||
|
||
- If the `group_by` and `order_by` arguments are not appropriate for | ||
the grouping method, an `Illegal_Argument` error is raised. | ||
- If the columns specified in `group_by` or `order_by` are not present | ||
in the table, a `Missing_Input_Columns` error is raised. | ||
- If the column with the same name as provided `name` already exists, | ||
a `Duplicate_Output_Column_Names` problem is reported and the | ||
existing column is renamed to avoid the clash. | ||
- If grouping on floating point numbers, a `Floating_Point_Equality` | ||
problem is reported. | ||
|
||
> Example | ||
Assign group numbers based on unique values of the first two columns. | ||
|
||
## table: | ||
x | y | z | ||
---+---+--- | ||
1 | 0 | 2 | ||
0 | 1 | 0 | ||
1 | 2 | 0 | ||
0 | 1 | 1 | ||
1 | 0 | 1 | ||
1 | 2 | 1 | ||
table = table_builder [['x', [1, 0, 1, 0, 1, 1]], ['y', [0, 1, 2, 1, 0, 2]], ['z' [2, 0, 0, 1, 1, 1]]] | ||
table2 = table.add_group_number ..Unique "g" group_by=['x', 'y'] | ||
table2.at 'g' . to_vector | ||
# => [0, 1, 2, 1, 0, 2] | ||
## table2: | ||
x | y | z | g | ||
---+---+---+--- | ||
1 | 0 | 2 | 0 | ||
0 | 1 | 0 | 1 | ||
1 | 2 | 0 | 2 | ||
0 | 1 | 1 | 1 | ||
1 | 0 | 1 | 2 | ||
1 | 2 | 1 | 0 | ||
|
||
> Example | ||
Divide rows into three groups. | ||
## table: | ||
x | y | ||
---+--- | ||
1 | 5 | ||
2 | 4 | ||
3 | 3 | ||
4 | 2 | ||
5 | 1 | ||
table = table_builder [['x', [1, 2, 3, 4, 5]], ['y', [5, 4, 3, 2, 1]]] | ||
table2 = tabble.add_group_number (..Equal_Count 3) "g" | ||
table2.at 'g' . to_vector | ||
# => [0, 0, 1, 1, 2] | ||
## table2: | ||
x | y | g | ||
---+---+--- | ||
1 | 5 | 0 | ||
2 | 4 | 0 | ||
3 | 3 | 1 | ||
4 | 2 | 1 | ||
5 | 1 | 2 | ||
@name (Widget.Text_Input display=..Always) | ||
@from (Widget.Numeric_Input display=..Always) | ||
@group_by (Widget_Helpers.make_column_name_multi_selector display=..When_Modified) | ||
@order_by (Widget_Helpers.make_order_by_selector display=..When_Modified) | ||
add_group_number self (grouping_method:Grouping_Method=..Unique) (name:Text="Group") (from:Integer=0) (step:Integer=1) (group_by:(Vector | Text | Integer | Regex)=[]) (order_by:(Vector | Text)=[]) (on_problems:Problem_Behavior=..Report_Warning) -> Table = | ||
Incomparable_Values.handle_errors <| Add_Group_Number.add_group_number self grouping_method name from step group_by order_by on_problems | ||
|
||
## ALIAS add column, expression, formula, new column, update column | ||
GROUP Standard.Base.Values | ||
ICON column_add | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.