Skip to content

Commit

Permalink
Support Previous_Value in fill_nothing and fill_missing (#8105)
Browse files Browse the repository at this point in the history
- Adds `Previous_Value` to `fill_nothing` and `fill_empty`, as requested by #7192.
  • Loading branch information
radeusgd authored Oct 20, 2023
1 parent 1391dd9 commit 8172896
Show file tree
Hide file tree
Showing 16 changed files with 300 additions and 47 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@
deriving column values in the GUI.][8005]
- [Implemented `Table.expand_to_rows` for the in-memory backend.][8029]
- [Added XML support for `.to Table` and `.expand_column`.][8083]
- [Added `Previous_Value` option to `fill_nothing` and `fill_empty`.][8105]

[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
Expand Down Expand Up @@ -837,6 +838,7 @@
[8005]: https://github.com/enso-org/enso/pull/8005
[8029]: https://github.com/enso-org/enso/pull/8029
[8083]: https://github.com/enso-org/enso/pull/8083
[8105]: https://github.com/enso-org/enso/pull/8105

#### Enso Compiler

Expand Down
29 changes: 19 additions & 10 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import Standard.Base.Internal.Rounding_Helpers
from Standard.Base.Widget_Helpers import make_regex_text_widget

import Standard.Table.Data.Column.Column as Materialized_Column
import Standard.Table.Data.Constants.Previous_Value
import Standard.Table.Data.Type.Enso_Types
import Standard.Table.Data.Type.Value_Type_Helpers
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
Expand Down Expand Up @@ -1008,20 +1009,25 @@ type Column
Arguments:
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
will be used. If this argument is `Previous_Value`, the missing values
will be replaced with the previous value in the column. Note that the
first rows may stay `Nothing` if they do not have a previous value to
use.

> Example
Fill missing values in a column with the value 20.5.

import Standard.Examples

example_fill_nothing = Examples.decimal_column.fill_nothing 20.5
fill_nothing : Column | Any -> Column
@default (Widget_Helpers.make_fill_default_value_selector include_custom_text=False)
fill_nothing : Column | Previous_Value | Any -> Column
fill_nothing self default =
common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default]
common_type.if_not_error <|
op_result = self.make_binary_op "FILL_NULL" default self.name
adapt_unified_column op_result common_type
if Previous_Value == default then Error.throw (Unsupported_Database_Operation.Error "The Previous_Value argument is currently not supported in the database backend.") else
common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default]
common_type.if_not_error <|
op_result = self.make_binary_op "FILL_NULL" default self.name
adapt_unified_column op_result common_type

## ALIAS fill empty, if_empty
GROUP Standard.Base.Values
Expand All @@ -1032,18 +1038,21 @@ type Column
Arguments:
- default: The value to replace empty values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
will be used. If this argument is `Previous_Value`, the empty values
will be replaced with the previous value in the column. Note that the
first rows may stay empty if they do not have a previous value to use.

> Example
Fill empty values in a column with the value "hello".

import Standard.Examples

example_fill_empty = Examples.text_column_1.fill_empty "hello"
fill_empty : Column | Any -> Column
@default (Widget_Helpers.make_fill_default_value_selector include_custom_text=True)
fill_empty : Column | Previous_Value | Any -> Column
fill_empty self default =
Value_Type.expect_text self <|
Value_Type.expect_text default <|
if Previous_Value == default then Error.throw (Unsupported_Database_Operation.Error "The Previous_Value argument is currently not supported in the database backend.") else
Value_Type.expect_text self <| Value_Type.expect_text default <|
result = self.is_empty.iif default self
result.rename self.name

Expand Down
22 changes: 14 additions & 8 deletions distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ from Standard.Base.Widget_Helpers import make_delimiter_selector

import Standard.Table.Data.Calculations.Column_Operation.Column_Operation
import Standard.Table.Data.Column_Ref.Column_Ref
import Standard.Table.Data.Constants.Previous_Value
import Standard.Table.Data.Expression.Expression
import Standard.Table.Data.Expression.Expression_Error
import Standard.Table.Data.Join_Condition.Join_Condition
Expand Down Expand Up @@ -2368,16 +2369,19 @@ type Table
match names, or a Vector of these.
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
will be used. If this argument is `Previous_Value`, the missing values
will be replaced with the previous value in the column. Note that the
first rows may stay `Nothing` if they do not have a previous value to
use.

> Example
Fill missing values in two columns with the value 20.5.

fill_nothing = table.fill_nothing ["col0", "col1"] 20.5
@columns Widget_Helpers.make_column_name_vector_selector
@default Widget_Helpers.make_column_ref_by_name_selector
fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Any -> Table
fill_nothing self (columns : Vector | Text | Integer | Regex) (default : Column | Column_Ref | Any) =
@default (self -> Widget_Helpers.make_fill_default_value_selector column_source=self include_custom_text=False)
fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Previous_Value | Any -> Table
fill_nothing self (columns : Vector | Text | Integer | Regex) default =
resolved_default = (self:Table_Ref).resolve default
transformer col = col.fill_nothing resolved_default
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
Expand All @@ -2393,16 +2397,18 @@ type Table
match names, or a Vector of these.
- default: The value to replace empty values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
will be used. If this argument is `Previous_Value`, the empty values
will be replaced with the previous value in the column. Note that the
first rows may stay empty if they do not have a previous value to use.

> Example
Fill empty values in two columns with the value "hello".

fill_empty = table.fill_empty ["col0", "col1"] "hello"
@columns Widget_Helpers.make_column_name_vector_selector
@default Widget_Helpers.make_column_ref_or_text_value_selector
fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Any -> Table
fill_empty self (columns : Vector | Text | Integer | Regex) (default : Column | Column_Ref | Any) =
@default (self -> Widget_Helpers.make_fill_default_value_selector column_source=self include_custom_text=True)
fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Previous_Value | Any -> Table
fill_empty self (columns : Vector | Text | Integer | Regex) default =
resolved_default = (self:Table_Ref).resolve default
transformer col = col.fill_empty resolved_default
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
Expand Down
61 changes: 42 additions & 19 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import Standard.Base.Internal.Polyglot_Helpers
import Standard.Base.Internal.Rounding_Helpers
from Standard.Base.Widget_Helpers import make_regex_text_widget

import project.Data.Constants.Previous_Value
import project.Data.Data_Formatter.Data_Formatter
import project.Data.Table.Table
import project.Data.Type.Enso_Types
Expand Down Expand Up @@ -1105,28 +1106,33 @@ type Column
Arguments:
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
will be used. If this argument is `Previous_Value`, the missing values
will be replaced with the previous value in the column. Note that the
first rows may stay `Nothing` if they do not have a previous value to
use.

> Example
Fill missing values in a column with the value 20.5.

import Standard.Examples

example_fill_nothing = Examples.decimal_column.fill_nothing 20.5
fill_nothing : Column | Any -> Column
@default (Widget_Helpers.make_fill_default_value_selector include_custom_text=False)
fill_nothing : Column | Previous_Value | Any -> Column
fill_nothing self default =
common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default]
common_type.if_not_error <|
storage = self.java_column.getStorage
storage_type = Storage.from_value_type_strict common_type
new_storage = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
case default of
Column.Value java_col ->
other_storage = java_col.getStorage
storage.fillMissingFrom other_storage storage_type java_problem_aggregator
_ ->
storage.fillMissing default storage_type java_problem_aggregator
Column.Value (Java_Column.new self.name new_storage)
if Previous_Value == default then fill_previous self Nothing else
common_type = Value_Type_Helpers.find_common_type_for_arguments [self, default]
common_type.if_not_error <|
storage = self.java_column.getStorage
storage_type = Storage.from_value_type_strict common_type
new_storage = Java_Problems.with_problem_aggregator Problem_Behavior.Report_Warning java_problem_aggregator->
case default of
Column.Value java_col ->
other_storage = java_col.getStorage
storage.fillMissingFrom other_storage storage_type java_problem_aggregator
_ ->
storage.fillMissing default storage_type java_problem_aggregator
Column.Value (Java_Column.new self.name new_storage)

## ALIAS fill empty, if_empty
GROUP Standard.Base.Values
Expand All @@ -1137,20 +1143,24 @@ type Column
Arguments:
- default: The value to replace empty values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
will be used. If this argument is `Previous_Value`, the empty values
will be replaced with the previous value in the column. Note that the
first rows may stay empty if they do not have a previous value to use.

> Example
Fill empty values in a column with the value "hello".

import Standard.Examples

example_fill_empty = Examples.text_column_1.fill_empty "hello"
fill_empty : Column | Any -> Column
@default (Widget_Helpers.make_fill_default_value_selector include_custom_text=True)
fill_empty : Column | Previous_Value | Any -> Column
fill_empty self default =
Value_Type.expect_text self <|
Value_Type.expect_text default <|
result = self.is_empty.iif default self
result.rename self.name
if Previous_Value == default then fill_previous self self.is_empty else
Value_Type.expect_text default <|
result = self.is_empty.iif default self
result.rename self.name

## GROUP Standard.Base.Text
Checks for each element of the column if it starts with `other`.
Expand Down Expand Up @@ -2527,6 +2537,19 @@ naming_helper = Column_Naming_Helper.in_memory
Resolves the default date period for `date_add` depending on the source column value type.
default_date_period column = if column.value_type.has_date then Date_Period.Day else Time_Period.Hour

## PRIVATE
Fills the missing values in a provided column with the previous non-missing value.

Arguments:
- column: The column to fill.
- is_missing: A boolean column specifying which elements are deemed missing.
If set to `Nothing`, this will rely on the default missing value semantics
(`is_nothing`).
fill_previous column is_missing =
missing_storage = if Nothing == is_missing then Nothing else is_missing.java_column.getStorage
new_storage = column.java_column.getStorage.fillMissingFromPrevious missing_storage
Column.from_storage column.name new_storage

## PRIVATE
Conversion method to a Column from a Vector.
Column.from (that:Vector) (name:Text="Vector") = Column.from_vector name that
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
## Indicates that the operation should use the previous non-missing value to
when filling in missing values, for example in `fill_nothing` and
`fill_empty`.
type Previous_Value
22 changes: 14 additions & 8 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import project.Data.Calculations.Column_Operation.Column_Operation
import project.Data.Column as Column_Module
import project.Data.Column.Column
import project.Data.Column_Ref.Column_Ref
import project.Data.Constants.Previous_Value
import project.Data.Data_Formatter.Data_Formatter
import project.Data.Expression.Expression
import project.Data.Expression.Expression_Error
Expand Down Expand Up @@ -2371,16 +2372,19 @@ type Table
match names, or a Vector of these.
- default: The value to replace missing values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
will be used. If this argument is `Previous_Value`, the missing values
will be replaced with the previous value in the column. Note that the
first rows may stay `Nothing` if they do not have a previous value to
use.

> Example
Fill missing values in two columns with the value 20.5.

fill_nothing = table.fill_nothing ["col0", "col1"] 20.5
@columns Widget_Helpers.make_column_name_vector_selector
@default Widget_Helpers.make_column_ref_by_name_selector
fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Any -> Table
fill_nothing self (columns : Vector | Text | Integer | Regex) (default : Column | Column_Ref | Any) =
@default (self -> Widget_Helpers.make_fill_default_value_selector column_source=self include_custom_text=False)
fill_nothing : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Previous_Value | Any -> Table
fill_nothing self (columns : Vector | Text | Integer | Regex) default =
resolved_default = (self:Table_Ref).resolve default
transformer col = col.fill_nothing resolved_default
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
Expand All @@ -2396,16 +2400,18 @@ type Table
match names, or a Vector of these.
- default: The value to replace empty values with. If this argument
is a column, the value from `default` at the corresponding position
will be used.
will be used. If this argument is `Previous_Value`, the empty values
will be replaced with the previous value in the column. Note that the
first rows may stay empty if they do not have a previous value to use.

> Example
Fill empty values in two columns with the value "hello".

fill_empty = table.fill_empty ["col0", "col1"] "hello"
@columns Widget_Helpers.make_column_name_vector_selector
@default Widget_Helpers.make_column_ref_or_text_value_selector
fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Any -> Table
fill_empty self (columns : Vector | Text | Integer | Regex) (default : Column | Column_Ref | Any) =
@default (self -> Widget_Helpers.make_fill_default_value_selector column_source=self include_custom_text=True)
fill_empty : Vector (Integer | Text | Regex) | Text | Integer | Regex -> Column | Column_Ref | Previous_Value | Any -> Table
fill_empty self (columns : Vector | Text | Integer | Regex) default =
resolved_default = (self:Table_Ref).resolve default
transformer col = col.fill_empty resolved_default
Table_Helpers.replace_columns_with_transformed_columns self columns transformer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,16 @@ make_column_ref_or_text_value_selector table display=Display.Always =
custom_text_option = Option "'custom text'" "''"
Single_Choice values=(col_names_options+[custom_text_option]) display=display

## PRIVATE
If `column_source` is Nothing, `Column_Ref` options will not be added.
make_fill_default_value_selector : Table | Nothing -> Boolean -> Display -> Widget
make_fill_default_value_selector column_source=Nothing include_custom_text=False display=Display.Always =
col_names_options = if column_source.is_nothing then [] else
column_source.column_names.map (name -> Option name "(Column_Ref.Name "+name.pretty+")")
custom_text_option = if include_custom_text then [Option "'custom text'" "''"] else []
previous_value_option = [Option 'Previous Value' 'Previous_Value']
Single_Choice values=(previous_value_option+col_names_options+custom_text_option) display=display

## PRIVATE
Make a filter condition selector.
make_filter_condition_selector : Table -> Display -> Widget
Expand Down
2 changes: 2 additions & 0 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Main.enso
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import project.Excel.Excel_Range.Excel_Range
import project.Excel.Excel_Section.Excel_Section
import project.Excel.Excel_Workbook.Excel_Workbook
import project.Extensions.Prefix_Name.Prefix_Name
from project.Data.Constants import all
from project.Delimited.Delimited_Format.Delimited_Format import Delimited
from project.Excel.Excel_Format.Excel_Format import Excel
from project.Excel.Excel_Section.Excel_Section import Cell_Range, Range_Names, Sheet_Names, Worksheet
Expand Down Expand Up @@ -51,6 +52,7 @@ export project.Excel.Excel_Range.Excel_Range
export project.Excel.Excel_Section.Excel_Section
export project.Excel.Excel_Workbook.Excel_Workbook
export project.Extensions.Prefix_Name.Prefix_Name
from project.Data.Constants export all
from project.Delimited.Delimited_Format.Delimited_Format export Delimited
from project.Excel.Excel_Format.Excel_Format export Excel
from project.Excel.Excel_Section.Excel_Section export Cell_Range, Range_Names, Sheet_Names, Worksheet
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,39 @@ public Storage<?> fillMissing(Value arg, StorageType commonType, ProblemAggregat
}
}

@Override
public Storage<?> fillMissingFromPrevious(BoolStorage missingIndicator) {
if (missingIndicator != null) {
throw new IllegalStateException("Custom missing value semantics are not supported by BoolStorage.");
}

boolean previousValue = false;
boolean hasPrevious = false;
BitSet newMissing = new BitSet();
BitSet newValues = new BitSet();

Context context = Context.getCurrent();
for (int i = 0; i < size; i++) {
boolean isCurrentValueMissing = isMissing.get(i);
if (isCurrentValueMissing) {
if (hasPrevious) {
newValues.set(i, previousValue);
} else {
newMissing.set(i);
}
} else {
boolean currentValue = getItem(i);
newValues.set(i, currentValue);
previousValue = currentValue;
hasPrevious = true;
}

context.safepoint();
}

return new BoolStorage(newValues, newMissing, size, false);
}

@Override
public BoolStorage mask(BitSet mask, int cardinality) {
Context context = Context.getCurrent();
Expand Down
Loading

0 comments on commit 8172896

Please sign in to comment.