diff --git a/CHANGELOG.md b/CHANGELOG.md index 82a332762731..ce5e0daf7081 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -445,6 +445,7 @@ - [Added `at_least_one` flag to `Table.tokenize_to_rows`.][6539] - [Moved `Redshift` connector into a separate `AWS` library.][6550] - [Added `Date_Range`.][6621] +- [Implemented the `cast` operation for `Table` and `Column`.][6711] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -655,6 +656,7 @@ [6539]: https://github.com/enso-org/enso/pull/6539 [6550]: https://github.com/enso-org/enso/pull/6550 [6621]: https://github.com/enso-org/enso/pull/6621 +[6711]: https://github.com/enso-org/enso/pull/6711 #### Enso Compiler diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso index d1d8563df84c..4f354a115ebb 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso @@ -9,7 +9,8 @@ import Standard.Table.Internal.Java_Problems import Standard.Table.Internal.Problem_Builder.Problem_Builder import Standard.Table.Internal.Widget_Helpers from Standard.Table import Sort_Column, Data_Formatter, Value_Type, Auto -from Standard.Table.Errors import Floating_Point_Equality, Inexact_Type_Coercion, Invalid_Value_Type, Lossy_Conversion +from Standard.Table.Errors import Floating_Point_Equality, Inexact_Type_Coercion, Invalid_Value_Type, Conversion_Failure +from Standard.Table.Internal.Cast_Helpers import check_cast_compatibility import project.Connection.Connection.Connection import project.Data.SQL_Statement.SQL_Statement @@ -1002,18 +1003,13 @@ type Column _ = [format, locale] Error.throw <| Unsupported_Database_Operation.Error "`Column.format` is not implemented yet for the Database backends." - ## PRIVATE - UNSTABLE - Cast the column to a specific type. + ## Cast the column to a specific type. Arguments: - value_type: The `Value_Type` to cast the column to. - on_problems: Specifies how to handle problems if they occur, reporting them as warnings by default. - TODO [RW] this is a prototype needed for debugging, proper implementation - and testing will come with #6112. - In the Database backend, this will boil down to a CAST operation. In the in-memory backend, a conversion will be performed according to the following rules: @@ -1024,6 +1020,9 @@ type Column length. - Conversion between numeric types will replace values exceeding the range of the target type with `Nothing`. + - Converting decimal numbers into integers will truncate or round them, + depending on the backend. If more control is needed, use the various + rounding functions (such as `round` or `floor`). - Booleans may also be converted to numbers, with `True` being converted to `1` and `False` to `0`. The reverse is not supported - use `iif` instead. @@ -1032,32 +1031,27 @@ type Column - If a `Date` is to be converted to `Date_Time`, it will be set at midnight of the default system timezone. - ? Conversion Precision - - In the in-memory backend, if the conversion is lossy, a - `Lossy_Conversion` warning will be reported. The only exception is when - truncating a column which is already a text column - as then the - truncation seems like an intended behaviour, so it is not reported. If - truncating needs to occur when converting a non-text column, a warning - will still be reported. - - Currently, the warning is not reported for Database backends. + If the target type cannot fit some of the values (for example due to too + small range), a `Conversion_Failure` may be reported according to the + `on_problems` rules. The Database backends may fail with `SQL_Error` + instead. ? Inexact Target Type If the backend does not support the requested target type, the closest supported type is chosen and a `Inexact_Type_Coercion` problem is reported. - cast : Value_Type -> Problem_Behavior -> Column ! Illegal_Argument | Inexact_Type_Coercion | Lossy_Conversion - cast self value_type=self.value_type on_problems=Problem_Behavior.Report_Warning = - dialect = self.connection.dialect - type_mapping = dialect.get_type_mapping - target_sql_type = type_mapping.value_type_to_sql value_type on_problems - target_sql_type.if_not_error <| - infer_from_database new_expression = - SQL_Type_Reference.new self.connection self.context new_expression - new_column = dialect.make_cast self.as_internal target_sql_type infer_from_database - Column.Value new_column.name self.connection new_column.sql_type_reference new_column.expression self.context + cast : Value_Type -> Problem_Behavior -> Column ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure + cast self value_type on_problems=Problem_Behavior.Report_Warning = + check_cast_compatibility self.value_type value_type <| + dialect = self.connection.dialect + type_mapping = dialect.get_type_mapping + target_sql_type = type_mapping.value_type_to_sql value_type on_problems + target_sql_type.if_not_error <| + infer_from_database new_expression = + SQL_Type_Reference.new self.connection self.context new_expression + new_column = dialect.make_cast self.as_internal target_sql_type infer_from_database + Column.Value new_column.name self.connection new_column.sql_type_reference new_column.expression self.context ## ALIAS Transform Column diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso index ddbe8fb305dc..bc1cd9702523 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso @@ -1526,9 +1526,7 @@ type Table _ = [column, pattern, case_sensitivity, parse_values, on_problems] Error.throw (Unsupported_Database_Operation.Error "Table.parse_to_columns is not implemented yet for the Database backends.") - ## PRIVATE - UNSTABLE - Cast the selected columns to a specific type. + ## Cast the selected columns to a specific type. Returns a new table in which the selected columns are replaced with columns having the new types. @@ -1539,9 +1537,6 @@ type Table - on_problems: Specifies how to handle problems if they occur, reporting them as warnings by default. - TODO [RW] this is a prototype needed for debugging, proper implementation - and testing will come with #6112. - In the Database backend, this will boil down to a CAST operation. In the in-memory backend, a conversion will be performed according to the following rules: @@ -1552,6 +1547,9 @@ type Table length. - Conversion between numeric types will replace values exceeding the range of the target type with `Nothing`. + - Converting decimal numbers into integers will truncate or round them, + depending on the backend. If more control is needed, use the various + rounding functions (such as `round` or `floor`). - Booleans may also be converted to numbers, with `True` being converted to `1` and `False` to `0`. The reverse is not supported - use `iif` instead. @@ -1560,16 +1558,10 @@ type Table - If a `Date` is to be converted to `Date_Time`, it will be set at midnight of the default system timezone. - ? Conversion Precision - - In the in-memory backend, if the conversion is lossy, a - `Lossy_Conversion` warning will be reported. The only exception is when - truncating a column which is already a text column - as then the - truncation seems like an intended behaviour, so it is not reported. If - truncating needs to occur when converting a non-text column, a warning - will still be reported. - - Currently, the warning is not reported for Database backends. + If the target type cannot fit some of the values (for example due to too + small range), a `Conversion_Failure` may be reported according to the + `on_problems` rules. The Database backends may fail with `SQL_Error` + instead. ? Inexact Target Type @@ -1577,10 +1569,10 @@ type Table supported type is chosen and a `Inexact_Type_Coercion` problem is reported. @columns Widget_Helpers.make_column_name_vector_selector - cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Lossy_Conversion - cast self columns=[0] value_type=Value_Type.Char on_problems=Problem_Behavior.Report_Warning = - selected = self.select_columns columns - selected.columns.fold self table-> column_to_cast-> + cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure + cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning = + selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems + selected.fold self table-> column_to_cast-> new_column = column_to_cast.cast value_type on_problems table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update diff --git a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso index f3a0d0ffe72e..703e58b21eee 100644 --- a/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso +++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Internal/SQLite/SQLite_Dialect.enso @@ -145,12 +145,19 @@ type SQLite_Dialect make_cast : Internal_Column -> SQL_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column make_cast self column target_type _ = mapping = self.get_type_mapping - sql_type_text = mapping.sql_type_to_text target_type - new_expression = SQL_Expression.Operation "CAST" [column.expression, SQL_Expression.Literal sql_type_text] - # We override the type here, because SQLite gets it wrong if the column starts with NULL values. + target_value_type = mapping.sql_type_to_value_type target_type + custom_cast = make_custom_cast column target_value_type mapping + new_expression = custom_cast.if_nothing <| + self.make_cast_expression column target_type new_sql_type_reference = SQL_Type_Reference.from_constant target_type Internal_Column.Value column.name new_sql_type_reference new_expression + ## PRIVATE + make_cast_expression self column target_type = + mapping = self.get_type_mapping + sql_type_text = mapping.sql_type_to_text target_type + SQL_Expression.Operation "CAST" [column.expression, SQL_Expression.Literal sql_type_text] + ## PRIVATE needs_execute_query_for_type_inference : Boolean needs_execute_query_for_type_inference self = True @@ -164,12 +171,15 @@ type SQLite_Dialect So after unifying columns with mixed types, we add a cast to ensure that. adapt_unified_column : Internal_Column -> Value_Type -> (SQL_Expression -> SQL_Type_Reference) -> Internal_Column adapt_unified_column self column approximate_result_type infer_result_type_from_database_callback = + _ = infer_result_type_from_database_callback # TODO [RW] This may be revisited with #6281. case approximate_result_type of Nothing -> column _ -> sql_type = self.get_type_mapping.value_type_to_sql approximate_result_type Problem_Behavior.Ignore - self.make_cast column sql_type infer_result_type_from_database_callback + new_expression = self.make_cast_expression column sql_type + new_sql_type_reference = SQL_Type_Reference.from_constant sql_type + Internal_Column.Value column.name new_sql_type_reference new_expression ## PRIVATE prepare_fetch_types_query : SQL_Expression -> Context -> SQL_Statement @@ -353,3 +363,11 @@ decimal_div = Base_Generator.lift_binary_op "/" x-> y-> ## PRIVATE mod_op = Base_Generator.lift_binary_op "mod" x-> y-> x ++ " - FLOOR(CAST(" ++ x ++ " AS REAL) / CAST(" ++ y ++ " AS REAL)) * " ++ y + +## PRIVATE + It will return `Nothing` if the type does not require custom logic. +make_custom_cast column target_value_type type_mapping = + if target_value_type.is_text then + column_type = type_mapping.sql_type_to_value_type column.sql_type_reference.get + if column_type == Value_Type.Boolean then + SQL_Expression.Operation "IIF" [column.expression, SQL_Expression.Literal "'true'", SQL_Expression.Literal "'false'"] diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso index 483782c184e8..2f949c918d18 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso @@ -13,6 +13,7 @@ import project.Data.Type.Enso_Types import project.Data.Type.Storage import project.Data.Type.Value_Type_Helpers import project.Data.Table.Table +import project.Internal.Cast_Helpers import project.Internal.Java_Problems import project.Internal.Naming_Helpers.Naming_Helpers import project.Internal.Parse_Values_Helper @@ -21,7 +22,7 @@ import project.Data.Type.Value_Type_Helpers from project.Data.Table import print_table from project.Data.Type.Value_Type import Value_Type, Auto -from project.Errors import No_Index_Set_Error, Floating_Point_Equality, Invalid_Value_Type, Inexact_Type_Coercion +from project.Errors import No_Index_Set_Error, Floating_Point_Equality, Invalid_Value_Type, Inexact_Type_Coercion, Conversion_Failure from project.Internal.Java_Exports import make_string_builder polyglot java import org.enso.table.data.column.operation.map.MapOperationProblemBuilder @@ -1279,6 +1280,54 @@ type Column _ -> Error.throw <| Illegal_Argument.Error <| "Unsupported format type: " + format.to_text new_column + ## Cast the column to a specific type. + + Arguments: + - value_type: The `Value_Type` to cast the column to. + - on_problems: Specifies how to handle problems if they occur, reporting + them as warnings by default. + + In the Database backend, this will boil down to a CAST operation. + In the in-memory backend, a conversion will be performed according to + the following rules: + - Anything can be cast into the `Mixed` type. + - Converting to a `Char` type, the elements of the column will be + converted to text. If it is fixed length, the texts will be trimmed or + padded on the right with the space character to match the desired + length. + - Conversion between numeric types will replace values exceeding the + range of the target type with `Nothing`. + - Converting decimal numbers into integers will truncate or round them, + depending on the backend. If more control is needed, use the various + rounding functions (such as `round` or `floor`). + - Booleans may also be converted to numbers, with `True` being converted + to `1` and `False` to `0`. The reverse is not supported - use `iif` + instead. + - A `Date_Time` may be converted into a `Date` or `Time` type - the + resulting value will be truncated to the desired type. + - If a `Date` is to be converted to `Date_Time`, it will be set at + midnight of the default system timezone. + + If the target type cannot fit some of the values (for example due to too + small range), a `Conversion_Failure` may be reported according to the + `on_problems` rules. The Database backends may fail with `SQL_Error` + instead. + + ? Inexact Target Type + + If the backend does not support the requested target type, the closest + supported type is chosen and a `Inexact_Type_Coercion` problem is + reported. + cast : Value_Type -> Problem_Behavior -> Column ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure + cast self value_type on_problems=Problem_Behavior.Report_Warning = + Cast_Helpers.check_cast_compatibility self.value_type value_type <| + target_storage_type = Storage.from_value_type value_type on_problems + cast_problem_builder = Cast_Helpers.new_java_problem_builder self.name value_type + new_storage = self.java_column.getStorage.cast target_storage_type cast_problem_builder.to_java + problems = cast_problem_builder.get_problems + on_problems.attach_problems_before problems <| + Column.from_storage self.name new_storage + ## ALIAS Transform Column Applies `function` to each item in this column and returns the column diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index cc3b3e700e6c..02a074800c6c 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -873,9 +873,7 @@ type Table parse_problem_builder.attach_problems_before on_problems <| Table.new new_columns - ## PRIVATE - UNSTABLE - Cast the selected columns to a specific type. + ## Cast the selected columns to a specific type. Returns a new table in which the selected columns are replaced with columns having the new types. @@ -886,9 +884,6 @@ type Table - on_problems: Specifies how to handle problems if they occur, reporting them as warnings by default. - TODO [RW] this is a prototype needed for debugging, proper implementation - and testing will come with #6112. - In the Database backend, this will boil down to a CAST operation. In the in-memory backend, a conversion will be performed according to the following rules: @@ -899,6 +894,9 @@ type Table length. - Conversion between numeric types will replace values exceeding the range of the target type with `Nothing`. + - Converting decimal numbers into integers will truncate or round them, + depending on the backend. If more control is needed, use the various + rounding functions (such as `round` or `floor`). - Booleans may also be converted to numbers, with `True` being converted to `1` and `False` to `0`. The reverse is not supported - use `iif` instead. @@ -907,16 +905,10 @@ type Table - If a `Date` is to be converted to `Date_Time`, it will be set at midnight of the default system timezone. - ? Conversion Precision - - In the in-memory backend, if the conversion is lossy, a - `Lossy_Conversion` warning will be reported. The only exception is when - truncating a column which is already a text column - as then the - truncation seems like an intended behaviour, so it is not reported. If - truncating needs to occur when converting a non-text column, a warning - will still be reported. - - Currently, the warning is not reported for Database backends. + If the target type cannot fit some of the values (for example due to too + small range), a `Conversion_Failure` may be reported according to the + `on_problems` rules. The Database backends may fail with `SQL_Error` + instead. ? Inexact Target Type @@ -924,11 +916,12 @@ type Table supported type is chosen and a `Inexact_Type_Coercion` problem is reported. @columns Widget_Helpers.make_column_name_vector_selector - cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Lossy_Conversion - cast self columns=[0] value_type=Value_Type.Char on_problems=Problem_Behavior.Report_Warning = - _ = [columns, value_type, on_problems] - ## TODO [RW] actual implementation in #6112 - self + cast : (Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector)) -> Value_Type -> Boolean -> Problem_Behavior -> Table ! Illegal_Argument | Inexact_Type_Coercion | Conversion_Failure + cast self columns=[0] value_type error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning = + selected = self.columns_helper.resolve_columns columns error_on_missing_columns=error_on_missing_columns on_problems=on_problems + selected.fold self table-> column_to_cast-> + new_column = column_to_cast.cast value_type on_problems + table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update ## Splits a column of text into a set of new columns. The original column will be removed from the table. diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso index e4cb387f794f..549c079aca46 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso @@ -161,12 +161,13 @@ type Value_Type _ -> False ## UNSTABLE - Checks if the `Value_Type` represents any numeric type - integer, + Checks if the `Value_Type` represents any numeric type - integer, byte, floating point or decimal. is_numeric : Boolean is_numeric self = case self of - Value_Type.Integer _ -> True - Value_Type.Float _ -> True + Value_Type.Integer _ -> True + Value_Type.Float _ -> True + Value_Type.Byte -> True Value_Type.Decimal _ _ -> True _ -> False diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso index 483cd271bb39..0400bd7ff319 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Errors.enso @@ -552,11 +552,23 @@ type Inexact_Type_Coercion to_text self = "Inexact_Type_Coercion.Warning (requested_type = " + self.requested_type.to_text + ") (actual_type = " + self.actual_type.to_text + ")" -## TODO figure out this error in #6112 -type Lossy_Conversion - ## Indicates that some likely not-insignificant information was lost during - a conversion. - Error +type Conversion_Failure + ## Indicates that some values from the column could not be converted to the + desired type. + + This may occur for example when a number does not fit the range of the + target type. + Error (target_type : Value_Type) (related_column : Text) (affected_rows_count : Nothing|Integer) + + ## PRIVATE + + Create a human-readable version of the error. + to_display_text : Text + to_display_text self = + rows_info = case self.affected_rows_count of + Nothing -> "Some values" + count -> count.to_text+" rows" + rows_info + " could not be converted into the target type "+self.target_type.to_display_text+" when converting the column ["+self.related_column+"]." type Invalid_Value_For_Type ## PRIVATE diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Cast_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Cast_Helpers.enso new file mode 100644 index 000000000000..88e96a527b66 --- /dev/null +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Cast_Helpers.enso @@ -0,0 +1,58 @@ +from Standard.Base import all +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument + +import project.Data.Type.Value_Type.Value_Type +import project.Internal.Parse_Values_Helper +from project.Errors import Conversion_Failure + +polyglot java import org.enso.table.data.column.operation.CastProblemBuilder + +## PRIVATE + Checks if one type can be cast into another and returns a dataflow error + explaining the situation if not. +check_cast_compatibility source_type target_type ~action = + are_compatible = if (target_type == Value_Type.Mixed) || target_type.is_text || (source_type == target_type) then True else + if source_type.is_text && is_a_valid_parse_target target_type then Error.throw (Illegal_Argument.Error "To parse a text column into "+target_type.to_display_text+" type, `parse` should be used instead of `cast`.") else + if source_type == Value_Type.Boolean then target_type.is_numeric else + if source_type.is_numeric then target_type.is_numeric else + case source_type of + Value_Type.Date_Time _ -> + (target_type == Value_Type.Date) || (target_type == Value_Type.Time) + Value_Type.Date -> target_type.has_date + Value_Type.Binary _ _ -> case target_type of + Value_Type.Binary _ _ -> True + _ -> False + _ -> False + if are_compatible then action else + Error.throw (Illegal_Argument.Error "Cannot cast "+source_type.to_display_text+" type into "+target_type.to_display_text+" type.") + +## PRIVATE + Checks if the type is a valid argument for `parse`. +is_a_valid_parse_target target_type = + case Meta.meta target_type of + atom : Meta.Atom -> + Parse_Values_Helper.valid_parse_targets.contains atom.constructor.name + _ -> False + +## PRIVATE +type Cast_Problem_Builder + ## PRIVATE + Value column_name target_type to_java + + ## PRIVATE + Returns a vector of all reported problems. + get_problems : Vector + get_problems self = + builder = Vector.new_builder + java_instance = self.to_java + + lossy_conversion_rows = java_instance.getLossyConversionRowCount + if lossy_conversion_rows > 0 then + builder.append (Conversion_Failure.Error self.target_type self.column_name lossy_conversion_rows) + + builder.to_vector + +## PRIVATE +new_java_problem_builder : Text -> Value_Type -> Cast_Problem_Builder +new_java_problem_builder column_name target_type = + Cast_Problem_Builder.Value column_name target_type CastProblemBuilder.new diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Parse_Values_Helper.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Parse_Values_Helper.enso index 0464e15e49e3..8b247b76ac97 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Parse_Values_Helper.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Parse_Values_Helper.enso @@ -13,3 +13,7 @@ translate_parsing_problem expected_value_type problem = case problem of Invalid_Format.Error java_problem.column expected_value_type (Vector.from_polyglot_array java_problem.cells) _ -> Panic.throw (Illegal_State.Error "Reported an unknown problem type: "+problem.to_text) + +## PRIVATE +valid_parse_targets = + ['Integer', 'Float', 'Date', 'Date_Time', 'Time', 'Boolean'] diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso index 3c1a213c41ab..b8f4330903a9 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Table_Helpers.enso @@ -64,6 +64,17 @@ type Table_Column_Helper problem_builder.attach_problems_before on_problems <| if result.is_empty then Error.throw No_Output_Columns else result + ## PRIVATE + Works like `select_columns` but will not throw `No_Output_Columns` error + and will return proper columns instead of internal columns. + Useful, when selecting a subset of columns to transform. + resolve_columns : Text | Integer | Column_Selector | Vector (Integer | Text | Column_Selector) -> Boolean -> Problem_Behavior -> Boolean -> Vector + resolve_columns self selectors error_on_missing_columns on_problems reorder=False = + problem_builder = Problem_Builder.new error_on_missing_columns=error_on_missing_columns + result = self.select_columns_helper selectors reorder problem_builder + problem_builder.attach_problems_before on_problems <| + result.map self.make_column + ## PRIVATE A helper function encapsulating shared code for `remove_columns` implementations of various Table variants. See the documentation for the diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso index b1bfae4ab6ec..cd5392243507 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Internal/Widget_Helpers.enso @@ -5,6 +5,7 @@ import Standard.Base.Metadata.Display import project.Data.Table.Table import project.Data.Aggregate_Column.Aggregate_Column +import project.Internal.Parse_Values_Helper ## PRIVATE Make an aggregate column selector. @@ -72,8 +73,8 @@ make_order_by_selector table display=Display.Always = Selector for type argument on `Column.parse`. parse_type_selector : Single_Choice parse_type_selector = - choice = ['Auto', 'Value_Type.Integer', 'Value_Type.Float', 'Value_Type.Date', 'Value_Type.Date_Time', 'Value_Type.Time', 'Value_Type.Boolean'] - names = ['Auto', 'Integer', 'Float', 'Date', 'Date_Time', 'Time', 'Boolean'] + valid_parse_targets = Parse_Values_Helper.valid_parse_targets + choice = ['Auto'] + (valid_parse_targets.map t-> 'Value_Type.'+t) + names = ['Auto'] + valid_parse_targets options = names.zip choice . map pair-> Option pair.first pair.second Single_Choice display=Display.Always values=options - diff --git a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso index 2af7af331dbc..c9cf3ab8053a 100644 --- a/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso +++ b/distribution/lib/Standard/Test/0.0.0-dev/src/Problems.enso @@ -90,7 +90,7 @@ expect_warning expected_warning result = warnings = get_attached_warnings result found = warnings.find if_missing=Nothing x-> (x == expected_warning) || (x.is_a expected_warning) - if found.is_nothing then + found.if_nothing <| loc = Meta.get_source_location 2 Test.fail "Expected the result to contain a warning: "+expected_warning.to_text+", but it did not. The warnings were "+warnings.short_display_text+' (at '+loc+').' diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDate.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDate.java index 81c2aefb7916..50da38a6e2f3 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDate.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoDate.java @@ -14,6 +14,7 @@ import org.enso.interpreter.runtime.EnsoContext; import org.enso.interpreter.runtime.data.text.Text; import org.enso.interpreter.runtime.library.dispatch.TypesLibrary; +import org.enso.polyglot.common_utils.Core_Date_Utils; import java.time.DateTimeException; import java.time.LocalDate; @@ -121,6 +122,9 @@ Type getType(@CachedLibrary("this") TypesLibrary thisLib) { @CompilerDirectives.TruffleBoundary @ExportMessage public Object toDisplayString(boolean allowSideEffects) { - return DateTimeFormatter.ISO_LOCAL_DATE.format(date); + return DATE_FORMATTER.format(date); } + + private static final DateTimeFormatter DATE_FORMATTER = + Core_Date_Utils.defaultLocalDateFormatter(); } diff --git a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoTimeOfDay.java b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoTimeOfDay.java index 0ef6954a3379..170d5007b36d 100644 --- a/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoTimeOfDay.java +++ b/engine/runtime/src/main/java/org/enso/interpreter/runtime/data/EnsoTimeOfDay.java @@ -17,6 +17,7 @@ import org.enso.interpreter.runtime.EnsoContext; import org.enso.interpreter.runtime.data.text.Text; import org.enso.interpreter.runtime.library.dispatch.TypesLibrary; +import org.enso.polyglot.common_utils.Core_Date_Utils; @ExportLibrary(InteropLibrary.class) @ExportLibrary(TypesLibrary.class) @@ -122,7 +123,7 @@ public EnsoDateTime toTime(EnsoDate date, EnsoTimeZone zone) { @Builtin.Method(description = "Return this datetime to the datetime in the provided time zone.") @CompilerDirectives.TruffleBoundary public Text toText() { - return Text.create(DateTimeFormatter.ISO_LOCAL_TIME.format(localTime)); + return Text.create(TIME_FORMATTER.format(localTime)); } @ExportMessage @@ -168,6 +169,9 @@ Type getType(@CachedLibrary("this") TypesLibrary thisLib) { @CompilerDirectives.TruffleBoundary @ExportMessage public Object toDisplayString(boolean allowSideEffects) { - return DateTimeFormatter.ISO_LOCAL_TIME.format(localTime); + return TIME_FORMATTER.format(localTime); } + + private static final DateTimeFormatter TIME_FORMATTER = + Core_Date_Utils.defaultLocalTimeFormatter(); } diff --git a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java index dae007747a58..b6c3ddd9fab0 100644 --- a/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java +++ b/lib/scala/common-polyglot-core-utils/src/main/java/org/enso/polyglot/common_utils/Core_Date_Utils.java @@ -33,6 +33,16 @@ public static DateTimeFormatter defaultZonedDateTimeFormatter() { .toFormatter(); } + /** @return default Date formatter for parsing a Date. */ + public static DateTimeFormatter defaultLocalDateFormatter() { + return DateTimeFormatter.ISO_LOCAL_DATE; + } + + /** @return default Time formatter for parsing a Time_Of_Day. */ + public static DateTimeFormatter defaultLocalTimeFormatter() { + return DateTimeFormatter.ISO_LOCAL_TIME; + } + /** * Parse a date time string into a ZonedDateTime. * diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java index 51054ca7b6b4..61d83c49df3b 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/object/Builder.java @@ -10,26 +10,26 @@ public abstract class Builder { public static Builder getForType(StorageType type, int size) { Builder builder = switch (type) { - case AnyObjectType() -> new ObjectBuilder(size); - case BooleanType() -> new BoolBuilder(size); - case DateType() -> new DateBuilder(size); - case DateTimeType() -> new DateTimeBuilder(size); - case TimeOfDayType() -> new TimeOfDayBuilder(size); - case FloatType(Bits bits) -> - switch (bits) { + case AnyObjectType x -> new ObjectBuilder(size); + case BooleanType x -> new BoolBuilder(size); + case DateType x -> new DateBuilder(size); + case DateTimeType x -> new DateTimeBuilder(size); + case TimeOfDayType x -> new TimeOfDayBuilder(size); + case FloatType floatType -> + switch (floatType.bits()) { case BITS_64 -> NumericBuilder.createDoubleBuilder(size); default -> throw new IllegalArgumentException("Only 64-bit floats are currently supported."); }; - case IntegerType(Bits bits) -> - switch (bits) { + case IntegerType integerType -> + switch (integerType.bits()) { case BITS_64 -> NumericBuilder.createLongBuilder(size); default -> throw new IllegalArgumentException("TODO: Builders other than 64-bit int are not yet supported."); }; - case TextType(long maxLength, boolean isFixed) -> { - if (isFixed) { + case TextType textType -> { + if (textType.fixedLength()) { throw new IllegalArgumentException("Fixed-length text builders are not yet supported yet."); } - if (maxLength >= 0) { + if (textType.maxLength() >= 0) { throw new IllegalArgumentException("Text builders with a maximum length are not yet supported yet."); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/operation/CastProblemBuilder.java b/std-bits/table/src/main/java/org/enso/table/data/column/operation/CastProblemBuilder.java new file mode 100644 index 000000000000..018931bde162 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/CastProblemBuilder.java @@ -0,0 +1,13 @@ +package org.enso.table.data.column.operation; + +public class CastProblemBuilder { + private int lossyConversionRowCount = 0; + + public void reportLossyConversion() { + lossyConversionRowCount++; + } + + public int getLossyConversionRowCount() { + return lossyConversionRowCount; + } +} diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java index 1e5410bab4c0..11bb098c12ee 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/BoolStorage.java @@ -3,18 +3,18 @@ import java.util.BitSet; import java.util.List; import java.util.function.IntFunction; - import org.enso.base.polyglot.Polyglot_Utils; import org.enso.table.data.column.builder.object.BoolBuilder; import org.enso.table.data.column.builder.object.Builder; -import org.enso.table.data.column.builder.object.InferredBuilder; +import org.enso.table.data.column.builder.object.NumericBuilder; +import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperation; import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.operation.map.bool.BooleanIsInOp; -import org.enso.table.data.column.storage.type.BooleanType; -import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.column.storage.type.*; import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; @@ -81,12 +81,14 @@ public boolean isOpVectorized(String name) { } @Override - protected Storage runVectorizedMap(String name, Object argument, MapOperationProblemBuilder problemBuilder) { + protected Storage runVectorizedMap( + String name, Object argument, MapOperationProblemBuilder problemBuilder) { return ops.runMap(name, this, argument, problemBuilder); } @Override - protected Storage runVectorizedZip(String name, Storage argument, MapOperationProblemBuilder problemBuilder) { + protected Storage runVectorizedZip( + String name, Storage argument, MapOperationProblemBuilder problemBuilder) { return ops.runZip(name, this, argument, problemBuilder); } @@ -198,10 +200,10 @@ public Storage iif(Value when_true, Value when_false, StorageType resultStora private static IntFunction makeRowProvider(Value value) { if (value.isHostObject() && value.asHostObject() instanceof Storage s) { - return i->(Object)s.getItemBoxed(i); + return i -> (Object) s.getItemBoxed(i); } var converted = Polyglot_Utils.convertPolyglotValue(value); - return i->converted; + return i -> converted; } private static MapOpStorage buildOps() { @@ -217,7 +219,8 @@ protected BoolStorage run(BoolStorage storage) { .add( new MapOperation<>(Maps.EQ) { @Override - public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) { + public BoolStorage runMap( + BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) { if (arg == null) { return BoolStorage.makeEmpty(storage.size); } else if (arg instanceof Boolean v) { @@ -233,7 +236,8 @@ public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBu } @Override - public BoolStorage runZip(BoolStorage storage, Storage arg, MapOperationProblemBuilder problemBuilder) { + public BoolStorage runZip( + BoolStorage storage, Storage arg, MapOperationProblemBuilder problemBuilder) { BitSet out = new BitSet(); BitSet missing = new BitSet(); for (int i = 0; i < storage.size; i++) { @@ -251,7 +255,8 @@ public BoolStorage runZip(BoolStorage storage, Storage arg, MapOperationProbl .add( new MapOperation<>(Maps.AND) { @Override - public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) { + public BoolStorage runMap( + BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) { if (arg == null) { return BoolStorage.makeEmpty(storage.size); } else if (arg instanceof Boolean v) { @@ -266,7 +271,8 @@ public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBu } @Override - public BoolStorage runZip(BoolStorage storage, Storage arg, MapOperationProblemBuilder problemBuilder) { + public BoolStorage runZip( + BoolStorage storage, Storage arg, MapOperationProblemBuilder problemBuilder) { if (arg instanceof BoolStorage v) { BitSet missing = v.isMissing.get(0, storage.size); missing.or(storage.isMissing); @@ -295,7 +301,8 @@ public BoolStorage runZip(BoolStorage storage, Storage arg, MapOperationProbl .add( new MapOperation<>(Maps.OR) { @Override - public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) { + public BoolStorage runMap( + BoolStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) { if (arg == null) { return BoolStorage.makeEmpty(storage.size); } else if (arg instanceof Boolean v) { @@ -310,7 +317,8 @@ public BoolStorage runMap(BoolStorage storage, Object arg, MapOperationProblemBu } @Override - public BoolStorage runZip(BoolStorage storage, Storage arg, MapOperationProblemBuilder problemBuilder) { + public BoolStorage runZip( + BoolStorage storage, Storage arg, MapOperationProblemBuilder problemBuilder) { if (arg instanceof BoolStorage v) { BitSet missing = v.isMissing.get(0, storage.size); missing.or(storage.isMissing); @@ -391,4 +399,51 @@ public BoolStorage slice(List ranges) { return new BoolStorage(newValues, newMissing, newSize, negated); } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + return switch (targetType) { + case AnyObjectType any -> + new MixedStorageFacade(this); + case BooleanType booleanType -> + this; + case FloatType floatType -> { + int n = size(); + NumericBuilder builder = NumericBuilder.createDoubleBuilder(n); + for (int i = 0; i < n; i++) { + if (isNa(i)) { + builder.appendNulls(1); + } else { + builder.appendDouble(values.get(i) ? 1.0 : 0.0); + } + } + yield builder.seal(); + } + case IntegerType integerType -> { + int n = size(); + NumericBuilder builder = NumericBuilder.createLongBuilder(n); + for (int i = 0; i < n; i++) { + if (isNa(i)) { + builder.appendNulls(1); + } else { + builder.appendLong(values.get(i) ? 1 : 0); + } + } + yield builder.seal(); + } + case TextType textType -> { + int n = size(); + StringBuilder builder = new StringBuilder(n); + for (int i = 0; i < n; i++) { + if (isMissing.get(i)) { + builder.appendNulls(1); + } else { + builder.append(values.get(i) ? "True" : "False"); + } + } + yield StringStorage.adapt(builder.seal(), textType); + } + default -> throw new IllegalStateException("Conversion of BoolStorage to " + targetType + " is not supported"); + }; + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java index 9548ef77d13f..57c28b19a7bf 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateStorage.java @@ -1,14 +1,23 @@ package org.enso.table.data.column.storage; import java.time.LocalDate; +import java.time.LocalTime; +import java.time.ZoneId; +import java.time.ZonedDateTime; +import org.enso.polyglot.common_utils.Core_Date_Utils; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.DateBuilder; +import org.enso.table.data.column.builder.object.DateTimeBuilder; +import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.UnaryIntegerOp; import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp; +import org.enso.table.data.column.storage.type.DateTimeType; import org.enso.table.data.column.storage.type.DateType; import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.column.storage.type.TextType; public final class DateStorage extends SpecializedStorage { /** @@ -67,4 +76,37 @@ public StorageType getType() { public Builder createDefaultBuilderOfSameType(int capacity) { return new DateBuilder(capacity); } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + if (targetType instanceof DateTimeType) { + int n = size(); + DateTimeBuilder builder = new DateTimeBuilder(n); + for (int i = 0; i < n; i++) { + LocalDate date = data[i]; + if (date == null) { + builder.appendNulls(1); + } else { + ZonedDateTime converted = date.atStartOfDay().atZone(ZoneId.systemDefault()); + builder.append(converted); + } + } + return builder.seal(); + } else if (targetType instanceof TextType textType) { + int n = size(); + StringBuilder builder = new StringBuilder(n); + var formatter = Core_Date_Utils.defaultLocalDateFormatter(); + for (int i = 0; i < n; i++) { + LocalDate item = data[i]; + if (item == null) { + builder.appendNulls(1); + } else { + builder.append(item.format(formatter)); + } + } + return StringStorage.adapt(builder.seal(), textType); + } else { + return super.cast(targetType, castProblemBuilder); + } + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java index 126b1f919a29..e0fe4f98642f 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DateTimeStorage.java @@ -1,13 +1,20 @@ package org.enso.table.data.column.storage; +import org.enso.polyglot.common_utils.Core_Date_Utils; import org.enso.table.data.column.builder.object.Builder; +import org.enso.table.data.column.builder.object.DateBuilder; import org.enso.table.data.column.builder.object.DateTimeBuilder; +import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.builder.object.TimeOfDayBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.UnaryIntegerOp; import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp; -import org.enso.table.data.column.storage.type.DateTimeType; -import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.column.storage.type.*; +import org.enso.table.formatting.DateTimeFormatter; +import java.time.LocalDate; +import java.time.LocalTime; import java.time.ZonedDateTime; public final class DateTimeStorage extends SpecializedStorage { @@ -69,4 +76,50 @@ public StorageType getType() { public Builder createDefaultBuilderOfSameType(int capacity) { return new DateTimeBuilder(capacity); } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + if (targetType instanceof DateType) { + int n = size(); + DateBuilder builder = new DateBuilder(n); + for (int i = 0; i < n; i++) { + ZonedDateTime dateTime = data[i]; + if (dateTime == null) { + builder.appendNulls(1); + } else { + LocalDate converted = dateTime.toLocalDate(); + builder.append(converted); + } + } + return builder.seal(); + } else if (targetType instanceof TimeOfDayType) { + int n = size(); + TimeOfDayBuilder builder = new TimeOfDayBuilder(n); + for (int i = 0; i < n; i++) { + ZonedDateTime dateTime = data[i]; + if (dateTime == null) { + builder.appendNulls(1); + } else { + LocalTime converted = dateTime.toLocalTime(); + builder.append(converted); + } + } + return builder.seal(); + } else if (targetType instanceof TextType textType) { + int n = size(); + StringBuilder builder = new StringBuilder(n); + var formatter = Core_Date_Utils.defaultZonedDateTimeFormatter(); + for (int i = 0; i < n; i++) { + ZonedDateTime item = data[i]; + if (item == null) { + builder.appendNulls(1); + } else { + builder.append(item.format(formatter)); + } + } + return StringStorage.adapt(builder.seal(), textType); + } else { + return super.cast(targetType, castProblemBuilder); + } + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java index abeb15dc7e69..081ef613aff0 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/DoubleStorage.java @@ -1,24 +1,27 @@ package org.enso.table.data.column.storage; -import java.util.BitSet; -import java.util.List; - import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.NumericBuilder; +import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.operation.map.numeric.DoubleBooleanOp; import org.enso.table.data.column.operation.map.numeric.DoubleIsInOp; import org.enso.table.data.column.operation.map.numeric.DoubleNumericOp; -import org.enso.table.data.column.storage.type.FloatType; -import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.column.storage.type.*; import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; import org.graalvm.polyglot.Value; -/** A column containing floating point numbers. */ +import java.util.BitSet; +import java.util.List; + +/** + * A column containing floating point numbers. + */ public final class DoubleStorage extends NumericStorage { private final long[] data; private final BitSet isMissing; @@ -28,8 +31,7 @@ public final class DoubleStorage extends NumericStorage { /** * @param data the underlying data * @param size the number of items stored - * @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code - * i} is missing. + * @param isMissing a bit set denoting at index {@code i} whether the value at index {@code i} is missing. */ public DoubleStorage(long[] data, int size, BitSet isMissing) { this.data = data; @@ -43,13 +45,17 @@ public static DoubleStorage makeEmpty(int size) { return new DoubleStorage(new long[0], size, isMissing); } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public int size() { return size; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public int countMissing() { return isMissing.cardinality(); @@ -73,13 +79,17 @@ public Double getItemBoxed(int idx) { return isMissing.get(idx) ? null : Double.longBitsToDouble(data[idx]); } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public StorageType getType() { return FloatType.FLOAT_64; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public boolean isNa(long idx) { return isMissing.get((int) idx); @@ -357,4 +367,46 @@ public Storage slice(List ranges) { return new DoubleStorage(newData, newSize, newMissing); } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + return switch (targetType) { + case AnyObjectType any -> new MixedStorageFacade(this); + case FloatType floatType -> this; + case IntegerType integerType -> { + int n = size(); + NumericBuilder builder = NumericBuilder.createLongBuilder(n); + double min = (double) integerType.getMinValue(); + double max = (double) integerType.getMaxValue(); + for (int i = 0; i < n; i++) { + if (isMissing.get(i)) { + builder.appendNulls(1); + } else { + double value = getItem(i); + if (value < min || value > max) { + builder.appendNulls(1); + castProblemBuilder.reportLossyConversion(); + } else { + long converted = (long) value; + builder.appendLong(converted); + } + } + } + yield builder.seal(); + } + case TextType textType -> { + int n = size(); + StringBuilder builder = new StringBuilder(n); + for (int i = 0; i < n; i++) { + if (isMissing.get(i)) { + builder.appendNulls(1); + } else { + builder.append(Double.toString(getItem(i))); + } + } + yield StringStorage.adapt(builder.seal(), textType); + } + default -> throw new IllegalStateException("Conversion of DoubleStorage to " + targetType + " is not supported"); + }; + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java index b3c5bae93a33..a261224e99d4 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/LongStorage.java @@ -3,14 +3,15 @@ import org.enso.base.polyglot.NumericConverter; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.NumericBuilder; +import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.operation.map.numeric.LongBooleanOp; import org.enso.table.data.column.operation.map.numeric.LongIsInOp; import org.enso.table.data.column.operation.map.numeric.LongNumericOp; -import org.enso.table.data.column.storage.type.IntegerType; -import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.column.storage.type.*; import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; @@ -19,7 +20,9 @@ import java.util.BitSet; import java.util.List; -/** A column storing 64-bit integers. */ +/** + * A column storing 64-bit integers. + */ public final class LongStorage extends NumericStorage { // TODO [RW] at some point we will want to add separate storage classes for byte, short and int, // for more compact storage and more efficient handling of smaller integers; for now we will be @@ -30,10 +33,10 @@ public final class LongStorage extends NumericStorage { private static final MapOpStorage ops = buildOps(); /** - * @param data the underlying data - * @param size the number of items stored + * @param data the underlying data + * @param size the number of items stored * @param isMissing a bit set denoting at index {@code i} whether or not the value at index {@code - * i} is missing. + * i} is missing. */ public LongStorage(long[] data, int size, BitSet isMissing) { this.data = data; @@ -51,13 +54,17 @@ public LongStorage(long[] data) { this(data, data.length, new BitSet()); } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public int size() { return size; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public int countMissing() { return isMissing.cardinality(); @@ -81,14 +88,18 @@ public Long getItemBoxed(int idx) { return isMissing.get(idx) ? null : data[idx]; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public StorageType getType() { - // TODO add possibility to set integer bit limit + // TODO add possibility to set integer bit limit (#5159) return IntegerType.INT_64; } - /** @inheritDoc */ + /** + * @inheritDoc + */ @Override public boolean isNa(long idx) { return isMissing.get((int) idx); @@ -446,4 +457,38 @@ public LongStorage slice(List ranges) { return new LongStorage(newData, newSize, newMissing); } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + return switch (targetType) { + case AnyObjectType any -> new MixedStorageFacade(this); + case IntegerType integerType -> this; + case FloatType floatType -> { + int n = size(); + NumericBuilder builder = NumericBuilder.createDoubleBuilder(n); + for (int i = 0; i < n; i++) { + if (isNa(i)) { + builder.appendNulls(1); + } else { + double converted = (double) getItem(i); + builder.appendDouble(converted); + } + } + yield builder.seal(); + } + case TextType textType -> { + int n = size(); + StringBuilder builder = new StringBuilder(n); + for (int i = 0; i < n; i++) { + if (isMissing.get(i)) { + builder.appendNulls(1); + } else { + builder.append(Long.toString(getItem(i))); + } + } + yield StringStorage.adapt(builder.seal(), textType); + } + default -> throw new IllegalStateException("Conversion of LongStorage to " + targetType + " is not supported"); + }; + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java index 1cbcdb4c8146..e1aba54db7e9 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java @@ -1,6 +1,7 @@ package org.enso.table.data.column.storage; import org.enso.table.data.column.builder.object.Builder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; import org.enso.table.data.column.storage.type.AnyObjectType; import org.enso.table.data.column.storage.type.StorageType; @@ -99,4 +100,9 @@ public Storage slice(List ranges) { Storage newStorage = underlyingStorage.slice(ranges); return new MixedStorageFacade(newStorage); } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + return null; + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java index e49b4e8567e8..5f10ee52a0e2 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/ObjectStorage.java @@ -4,10 +4,12 @@ import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.ObjectBuilder; +import org.enso.table.data.column.builder.object.StringBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.UnaryMapOperation; import org.enso.table.data.column.storage.type.AnyObjectType; import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.column.storage.type.TextType; /** A column storing arbitrary objects. */ public final class ObjectStorage extends SpecializedStorage { diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java index 5a30c4e4750c..8d7774a39bb4 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/SpecializedStorage.java @@ -2,9 +2,14 @@ import java.util.BitSet; import java.util.List; + +import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; +import org.enso.table.data.column.storage.type.AnyObjectType; import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.column.storage.type.TextType; import org.enso.table.data.index.Index; import org.enso.table.data.mask.OrderMask; import org.enso.table.data.mask.SliceRange; @@ -149,4 +154,27 @@ public SpecializedStorage slice(List ranges) { return newInstance(newData, newSize); } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + if (targetType == getType()) { + return this; + } else if (targetType instanceof AnyObjectType) { + return new MixedStorageFacade(this); + } else if (targetType instanceof TextType textType) { + int n = size(); + StringBuilder builder = new StringBuilder(n); + for (int i = 0; i < n; i++) { + Object item = data[i]; + if (item == null) { + builder.appendNulls(1); + } else { + builder.append(item.toString()); + } + } + return StringStorage.adapt(builder.seal(), textType); + } else { + throw new IllegalStateException("Conversion of " + this.getClass().getSimpleName() + " to " + targetType + " is not supported"); + } + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java index 99fbaf33b17b..fb5875d351e6 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java @@ -4,6 +4,7 @@ import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.InferredBuilder; import org.enso.table.data.column.builder.object.ObjectBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.mask.OrderMask; @@ -345,4 +346,6 @@ public Storage duplicateCount() { } return new LongStorage(data); } + + public abstract Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder); } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java index 7548ff2fda26..5b047f66d054 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java @@ -1,10 +1,10 @@ package org.enso.table.data.column.storage; import java.util.BitSet; - import org.enso.base.Text_Utils; import org.enso.table.data.column.builder.object.Builder; import org.enso.table.data.column.builder.object.StringBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.MapOperation; import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; @@ -13,6 +13,7 @@ import org.enso.table.data.column.operation.map.text.StringBooleanOp; import org.enso.table.data.column.operation.map.text.StringIsInOp; import org.enso.table.data.column.operation.map.text.StringStringOp; +import org.enso.table.data.column.storage.type.AnyObjectType; import org.enso.table.data.column.storage.type.StorageType; import org.enso.table.data.column.storage.type.TextType; import org.graalvm.polyglot.Value; @@ -47,12 +48,14 @@ public StorageType getType() { private static final MapOpStorage> ops = buildOps(); @Override - protected Storage runVectorizedMap(String name, Object argument, MapOperationProblemBuilder problemBuilder) { + protected Storage runVectorizedMap( + String name, Object argument, MapOperationProblemBuilder problemBuilder) { return ops.runMap(name, this, argument, problemBuilder); } @Override - protected Storage runVectorizedZip(String name, Storage argument, MapOperationProblemBuilder problemBuilder) { + protected Storage runVectorizedZip( + String name, Storage argument, MapOperationProblemBuilder problemBuilder) { return ops.runZip(name, this, argument, problemBuilder); } @@ -70,12 +73,24 @@ public Builder createDefaultBuilderOfSameType(int capacity) { return new StringBuilder(capacity); } + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + return switch (targetType) { + case AnyObjectType any -> new MixedStorageFacade(this); + case TextType textType -> adapt(this, textType); + default -> throw new IllegalStateException("Conversion of StringStorage to " + targetType + " is not supported"); + }; + } + private static MapOpStorage> buildOps() { MapOpStorage> t = ObjectStorage.buildObjectOps(); t.add( new MapOperation<>(Maps.EQ) { @Override - public BoolStorage runMap(SpecializedStorage storage, Object arg, MapOperationProblemBuilder problemBuilder) { + public BoolStorage runMap( + SpecializedStorage storage, + Object arg, + MapOperationProblemBuilder problemBuilder) { BitSet r = new BitSet(); BitSet missing = new BitSet(); for (int i = 0; i < storage.size(); i++) { @@ -89,7 +104,10 @@ public BoolStorage runMap(SpecializedStorage storage, Object arg, MapOpe } @Override - public BoolStorage runZip(SpecializedStorage storage, Storage arg, MapOperationProblemBuilder problemBuilder) { + public BoolStorage runZip( + SpecializedStorage storage, + Storage arg, + MapOperationProblemBuilder problemBuilder) { BitSet r = new BitSet(); BitSet missing = new BitSet(); for (int i = 0; i < storage.size(); i++) { @@ -140,12 +158,28 @@ protected boolean doString(String a, String b) { }); t.add(new LikeOp()); t.add(new StringIsInOp<>()); - t.add(new StringStringOp(Maps.ADD) { - @Override - protected String doString(String a, String b) { - return a + b; - } - }); + t.add( + new StringStringOp(Maps.ADD) { + @Override + protected String doString(String a, String b) { + return a + b; + } + }); return t; } + + /** + * A helper method that can be used to adapt a variable length storage to a target type that may + * potentially be fixed length. + * + *

It will ensure that the values are trimmed or padded wherever necessary. + */ + public static Storage adapt(Storage storage, TextType type) { + if (type.fixedLength()) { + // TODO [RW] #5159 + throw new IllegalStateException("Fixed length conversion is currently not supported."); + } else { + return storage; + } + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java index 6944a55b920c..53f2c24a6ccf 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/TimeOfDayStorage.java @@ -1,12 +1,18 @@ package org.enso.table.data.column.storage; import java.time.LocalTime; +import java.time.ZonedDateTime; +import org.enso.polyglot.common_utils.Core_Date_Utils; import org.enso.table.data.column.builder.object.Builder; +import org.enso.table.data.column.builder.object.StringBuilder; import org.enso.table.data.column.builder.object.TimeOfDayBuilder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOpStorage; import org.enso.table.data.column.operation.map.datetime.DateTimeIsInOp; +import org.enso.table.data.column.storage.type.AnyObjectType; import org.enso.table.data.column.storage.type.StorageType; +import org.enso.table.data.column.storage.type.TextType; import org.enso.table.data.column.storage.type.TimeOfDayType; public final class TimeOfDayStorage extends SpecializedStorage { @@ -45,4 +51,24 @@ public StorageType getType() { public Builder createDefaultBuilderOfSameType(int capacity) { return new TimeOfDayBuilder(capacity); } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + if (targetType instanceof TextType textType) { + int n = size(); + StringBuilder builder = new StringBuilder(n); + var formatter = Core_Date_Utils.defaultLocalTimeFormatter(); + for (int i = 0; i < n; i++) { + LocalTime item = data[i]; + if (item == null) { + builder.appendNulls(1); + } else { + builder.append(item.format(formatter)); + } + } + return StringStorage.adapt(builder.seal(), textType); + } else { + return super.cast(targetType, castProblemBuilder); + } + } } diff --git a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/IntegerType.java b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/IntegerType.java index 73b5b0432545..df0239e86633 100644 --- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/IntegerType.java +++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/IntegerType.java @@ -2,4 +2,22 @@ public record IntegerType(Bits bits) implements StorageType { public static final IntegerType INT_64 = new IntegerType(Bits.BITS_64); + + public long getMaxValue() { + return switch (bits) { + case BITS_8 -> Byte.MAX_VALUE; + case BITS_16 -> Short.MAX_VALUE; + case BITS_32 -> Integer.MAX_VALUE; + case BITS_64 -> Long.MAX_VALUE; + }; + } + + public long getMinValue() { + return switch (bits) { + case BITS_8 -> Byte.MIN_VALUE; + case BITS_16 -> Short.MIN_VALUE; + case BITS_32 -> Integer.MIN_VALUE; + case BITS_64 -> Long.MIN_VALUE; + }; + } } diff --git a/test/Table_Tests/data/.gitignore b/test/Table_Tests/data/.gitignore index 64d2219dcf68..f3e86100a5f9 100644 --- a/test/Table_Tests/data/.gitignore +++ b/test/Table_Tests/data/.gitignore @@ -1,2 +1,4 @@ *.bak *.db +spreadsheet.xls +spreadsheet.xlsx diff --git a/test/Table_Tests/src/Common_Table_Operations/Cast_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Cast_Spec.enso deleted file mode 100644 index ed420d2287ae..000000000000 --- a/test/Table_Tests/src/Common_Table_Operations/Cast_Spec.enso +++ /dev/null @@ -1,104 +0,0 @@ -from Standard.Base import all - -from Standard.Table import Value_Type -import Standard.Table.Data.Type.Value_Type.Bits - -from Standard.Test import Test, Problems -import Standard.Test.Extensions - -from project.Common_Table_Operations.Util import run_default_backend - -main = run_default_backend spec - -spec setup = - prefix = setup.prefix - table_builder = setup.table_builder - materialize = setup.materialize - # TODO this spec will be expanded in #6112 - Test.group prefix+"Column.cast" pending=(if setup.is_database.not then "Cast is not implemented in the in-memory backend yet.") <| - Test.specify "should allow to cast an integer column to text" <| - t = table_builder [["X", [1, 2, 3000]]] - c = t.at "X" . cast Value_Type.Char - c.value_type.is_text . should_be_true - c.to_vector . should_equal ["1", "2", "3000"] - - Test.specify "should allow to cast a boolean column to integer" <| - t = table_builder [["X", [True, False, True]]] - c = t.at "X" . cast Value_Type.Integer - c.value_type.is_integer . should_be_true - c.to_vector . should_equal [1, 0, 1] - - Test.specify "should allow to cast a boolean column to text" pending="TODO: sqlite has issue with this, figure out in #6112" <| - t = table_builder [["X", [True, False, True]]] - c = t.at "X" . cast Value_Type.Char - c.value_type.is_text . should_be_true - c.to_vector . should_equal ["true", "false", "true"] - - Test.specify "should allow to cast a text column to fixed-length" pending=(if setup.test_selection.fixed_length_text_columns.not then "Fixed-length Char columns are not supported by this backend.") <| - t = table_builder [["X", ["a", "DEF", "a slightly longer text"]]] - c = t.at "X" . cast (Value_Type.Char size=3 variable_length=False) - c.value_type . should_equal (Value_Type.Char size=3 variable_length=False) - c.to_vector . should_equal ["a ", "DEF", "a s"] - - Test.specify "should work if the first row is NULL" <| - t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]] - - c1 = t.at "X" . cast Value_Type.Char - c1.value_type.is_text . should_be_true - c1.to_vector . should_equal [Nothing, "1", "2", "3000"] - - c2 = t.at "Y" . cast Value_Type.Integer - c2.value_type.is_integer . should_be_true - c2.to_vector . should_equal [Nothing, 1, 0, 1] - - Test.specify "should not lose the type after further operations were performed on the result" <| - t = table_builder [["X", [1, 2, 3000]], ["Y", [True, False, True]]] - c1 = t.at "X" . cast Value_Type.Char - c2 = t.at "Y" . cast Value_Type.Integer - - c3 = c1 + '_suffix' - c3.value_type.is_text . should_be_true - c3.to_vector . should_equal ["1_suffix", "2_suffix", "3000_suffix"] - - c4 = c2 + 1000 - c4.value_type.is_integer . should_be_true - c4.to_vector . should_equal [1001, 1000, 1001] - - Test.specify "should not lose the type after further operations were performed on the result, even if the first row is NULL" <| - t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]] - c1 = t.at "X" . cast Value_Type.Char - c2 = t.at "Y" . cast Value_Type.Integer - - c3 = c1 + '_suffix' - c3.value_type.is_text . should_be_true - c3.to_vector . should_equal [Nothing, "1_suffix", "2_suffix", "3000_suffix"] - - c4 = c2 + 1000 - c4.value_type.is_integer . should_be_true - c4.to_vector . should_equal [Nothing, 1001, 1000, 1001] - - Test.group prefix+"Table.cast" pending=(if setup.is_database.not then "Cast is not implemented in the in-memory backend yet.") <| - Test.specify 'should cast the columns "in-place" and not reorder them' <| - t = table_builder [["X", [1, 2, 3000]], ["Y", [4, 5, 6]], ["Z", [7, 8, 9]], ["A", [True, False, True]]] - t2 = t.cast ["Z", "Y"] Value_Type.Char - t2.column_names . should_equal ["X", "Y", "Z", "A"] - - t2.at "X" . value_type . is_integer . should_be_true - t2.at "Y" . value_type . is_text . should_be_true - t2.at "Z" . value_type . is_text . should_be_true - t2.at "A" . value_type . is_boolean . should_be_true - - t2.at "X" . to_vector . should_equal [1, 2, 3000] - t2.at "Y" . to_vector . should_equal ["4", "5", "6"] - t2.at "Z" . to_vector . should_equal ["7", "8", "9"] - t2.at "A" . to_vector . should_equal [True, False, True] - - if setup.test_selection.fixed_length_text_columns then - Test.specify "should preserve the overridden types when materialized" pending="TODO: #5159 needed" <| - t = table_builder [["X", [1, 2, 100]], ["Y", ["a", "abcdef", "abc"]]] - t2 = t . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=3 variable_length=False) - - t3 = materialize t2 - t3.at "X" . value_type . should_equal (t2.at "X" . value_type) - t3.at "Y" . value_type . should_equal (Value_Type.Char size=3 variable_length=False) - t3.at "Y" . to_vector . should_equal ["a ", "abc", "abc"] diff --git a/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso new file mode 100644 index 000000000000..431017fdcaea --- /dev/null +++ b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso @@ -0,0 +1,323 @@ +from Standard.Base import all +import Standard.Base.Errors.Illegal_Argument.Illegal_Argument + +from Standard.Table import Value_Type +import Standard.Table.Data.Type.Value_Type.Bits + +from Standard.Table.Errors import Conversion_Failure +from Standard.Database.Errors import Unsupported_Database_Operation + +from Standard.Test import Test, Problems +import Standard.Test.Extensions + +from project.Common_Table_Operations.Util import run_default_backend + +polyglot java import java.lang.Long as Java_Long + +main = run_default_backend spec + +type My_Type + Value x + + to_text : Text + to_text self = "{{{MY Type [x="+self.x.to_text+"] }}}" + +spec setup = + prefix = setup.prefix + table_builder = setup.table_builder + materialize = setup.materialize + supports_dates = setup.test_selection.date_time + Test.group prefix+"Table/Column.cast - to text" <| + Test.specify "should allow to cast columns of various basic types to text" <| + t = table_builder [["X", [1, 2, 3000]], ["Y", [True, False, True]], ["Z", [1.5, 0.125, -2.5]], ["W", ["a", "DEF", "a slightly longer text"]]] + t2 = t.cast t.column_names Value_Type.Char + t2.at "X" . value_type . is_text . should_be_true + t2.at "Y" . value_type . is_text . should_be_true + t2.at "Z" . value_type . is_text . should_be_true + t2.at "W" . value_type . is_text . should_be_true + + t2.at "X" . to_vector . should_equal ["1", "2", "3000"] + # Depending on the backend, the case of True/False literals may differ. + t2.at "Y" . to_vector . map (_.to_case Case.Lower) . should_equal ["true", "false", "true"] + t2.at "Z" . to_vector . should_equal ["1.5", "0.125", "-2.5"] + t2.at "W" . to_vector . should_equal ["a", "DEF", "a slightly longer text"] + + if supports_dates then + Test.specify "should allow to cast date/time columns to text" <| + t = table_builder [["X", [Date.new 2015 1 1, Date.new 2023 12 31]], ["Y", [Time_Of_Day.new 1 2 3, Time_Of_Day.new 23 57 59]], ["Z", [Date_Time.new 2015 1 1 1 2 3, Date_Time.new 2023 11 30 22 45 44]]] + t2 = t.cast t.column_names Value_Type.Char + t2.at "X" . value_type . is_text . should_be_true + t2.at "Y" . value_type . is_text . should_be_true + t2.at "Z" . value_type . is_text . should_be_true + + t2.at "X" . to_vector . should_equal ["2015-01-01", "2023-12-31"] + t2.at "Y" . to_vector . should_equal ["01:02:03", "23:57:59"] + # The particular format depends on the backend. + vz = t2.at "Z" . to_vector + vz.first . should_contain "2015-01-01" + vz.first . should_contain "01:02:03" + vz.second . should_contain "2023-11-30" + vz.second . should_contain "22:45:44" + + if setup.is_database.not then + Test.specify "should allow to cast a column of objects to text" <| + t = table_builder [["X", [My_Type.Value 42, My_Type.Value "X"]]] + c = t.at "X" . cast Value_Type.Char + c.value_type.is_text . should_be_true + c.to_vector . should_equal ["{{{MY Type [x=42] }}}", "{{{MY Type [x=X] }}}"] + + if setup.test_selection.fixed_length_text_columns then + Test.specify "should allow to cast a text column to fixed-length" <| + t = table_builder [["X", ["a", "DEF", "a slightly longer text"]]] + c = t.at "X" . cast (Value_Type.Char size=3 variable_length=False) + c.value_type . should_equal (Value_Type.Char size=3 variable_length=False) + c.to_vector . should_equal ["a ", "DEF", "a s"] + + # No Conversion_Failure warning here, because we started with text, so it was expected we will trim it if needed. + Problems.assume_no_problems c + + Test.specify "should allow casting a non-text column to fixed-length text" <| + t = table_builder [["X", [1, 22, 333]]] + c = t.at "X" . cast (Value_Type.Char size=3 variable_length=False) + c.value_type . should_equal (Value_Type.Char size=3 variable_length=False) + c.to_vector . should_equal ["1 ", "22 ", "333"] + Problems.assume_no_problems c + + Test.specify "should warn when losing data if the fixed-length text length is too short to fit the data" pending=(if setup.is_database then "Conversion_Failure is not supported in Database yet.") <| + t = table_builder [["X", [15, 1000000, 123456]]] + c = t.at "X" . cast (Value_Type.Char size=3 variable_length=False) + c.value_type . should_equal (Value_Type.Char size=3 variable_length=False) + c.to_vector . should_equal ["15 ", "100", "123"] + Problems.expect_warning Conversion_Failure c + + Test.group prefix+"Table/Column.cast - numeric" <| + Test.specify "should allow to cast a boolean column to integer" <| + t = table_builder [["X", [True, False, True]]] + c = t.at "X" . cast Value_Type.Integer + vt = c.value_type + Test.with_clue "Expecting "+vt.to_display_text+" to be Integer. " <| + vt.is_integer . should_be_true + c.to_vector . should_equal [1, 0, 1] + + Test.specify "should allow to cast an integer column to floating point" <| + t = table_builder [["X", [1, 2, 3]]] + c = t.at "X" . cast Value_Type.Float + c.value_type.is_floating_point . should_be_true + c.to_vector . should_equal [1.0, 2.0, 3.0] + + Test.specify "should allow to cast an integer column to a smaller bit-width and larger bit-width" pending="TODO: #5159" <| + t = table_builder [["X", [1, 2, 3]]] + c = t.at "X" . cast (Value_Type.Integer Bits.Bits_16) + c.value_type . should_equal (Value_Type.Integer Bits.Bits_16) + c.to_vector . should_equal [1, 2, 3] + + t2 = table_builder [["X", [1, 2, 12000000]]] + c2 = t2.at "X" . cast (Value_Type.Integer Bits.Bits_16) + c2.value_type . should_equal (Value_Type.Integer Bits.Bits_16) + c2.to_vector . should_equal [1, 2, Nothing] + # This can likely only be checked on in-memory. + Problems.expect_warning Conversion_Failure c2 + + r3 = t2.at "X" . cast (Value_Type.Integer Bits.Bits_16) on_problems=Problem_Behavior.Report_Error + r3.should_fail_with Conversion_Failure + + # Now converting the 16-bit column `c` into 32 bits. + c3 = c.cast (Value_Type.Integer Bits.Bits_32) + c3.value_type . should_equal (Value_Type.Integer Bits.Bits_32) + c3.to_vector . should_equal [1, 2, 3] + + Test.specify "should allow to cast a floating point column to integer" <| + t = table_builder [["X", [1.0001, 2.25, 4.0]]] + c = t.at "X" . cast Value_Type.Integer + vt = c.value_type + Test.with_clue "Expecting "+vt.to_display_text+" to be Integer. " <| + vt.is_integer . should_be_true + c.to_vector . should_equal [1, 2, 4] + # Not reporting Lossy Conversion as converting floats to integers obviously truncates the value. + Problems.assume_no_problems c + + # The backend may either truncate or round. + t2 = table_builder [["X", [1.1, 4.9]]] + c2 = t2.at "X" . cast Value_Type.Integer + v2 = c2.to_vector + [[1, 4], [1, 5]] . should_contain v2 + + if setup.is_database.not then + Test.specify "should report Conversion_Failure if converting a huge float to an integer overflows it" <| + max_long = Java_Long.MAX_VALUE + too_big_double = (max_long + 1.0) * 1000.0 + (too_big_double > max_long) . should_be_true + + min_long = Java_Long.MIN_VALUE + too_small_double = (min_long - 1.0) * 1000.0 + (too_small_double < min_long) . should_be_true + + v = [1.0, 2.1, max_long, too_big_double, min_long, too_small_double, 4.0] + t = table_builder [["X", v]] + t.at "X" . to_vector . should_equal v + t.at "X" . value_type . should_equal Value_Type.Float + + c = t.at "X" . cast Value_Type.Integer + c.value_type . should_equal Value_Type.Integer + c.to_vector . should_equal [1, 2, max_long, Nothing, min_long, Nothing, 4] + warning = Problems.expect_warning Conversion_Failure c + warning.to_display_text . should_contain "2 rows could not be converted" + + if supports_dates then + Test.group prefix+"Table/Column.cast - date/time" <| + Test.specify "should allow to get the Date part from a Date_Time" <| + t = table_builder [["X", [Date_Time.new 2015 1 2 3 4 5, Date_Time.new 2023 12 31 23 56 59]]] + c = t.at "X" . cast Value_Type.Date + c.value_type . should_equal Value_Type.Date + c.to_vector . should_equal [Date.new 2015 1 2, Date.new 2023 12 31] + + Test.specify "should allow to get the Time_Of_Day part from a Date_Time" <| + t = table_builder [["X", [Date_Time.new 2015 1 2 3 4 5, Date_Time.new 2023 12 31 23 56 59]]] + c = t.at "X" . cast Value_Type.Time + c.value_type . should_equal Value_Type.Time + c.to_vector . should_equal [Time_Of_Day.new 3 4 5, Time_Of_Day.new 23 56 59] + + Test.specify "should allow to convert a Date into Date_Time" <| + day1 = Date.new 2015 1 2 + day2 = Date.new 2023 12 31 + t = table_builder [["X", [day1, day2]]] + c = t.at "X" . cast Value_Type.Date_Time + c.value_type . should_equal Value_Type.Date_Time + vz = c.to_vector + # We cannot rely on what timezone the backend uses, so we just ensure that the time difference between the two results is consistent. + diff = Duration.between vz.first vz.second + expected_diff = Duration.between day1.to_date_time day2.to_date_time + diff . should_equal expected_diff + + Test.group prefix+"Table/Column.cast - checking invariants" <| + Test.specify "should report an error for unsupported conversions" <| + t = table_builder [["X", [1, 2, 3]]] + r1 = t.at "X" . cast Value_Type.Boolean + r1.should_fail_with Illegal_Argument + + Test.specify "should report an error pointing to the Table.parse method where applicable" <| + t = table_builder [["X", ["1", "2", "3"]]] + r1 = t.at "X" . cast Value_Type.Integer + r1.should_fail_with Illegal_Argument + r1.to_display_text . should_contain "`parse` should be used instead" + + Test.specify "should work if the first row is NULL" <| + t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]] + + c1 = t.at "X" . cast Value_Type.Char + c1.value_type.is_text . should_be_true + c1.to_vector . should_equal [Nothing, "1", "2", "3000"] + + c2 = t.at "Y" . cast Value_Type.Integer + c2.value_type . should_equal Value_Type.Integer + c2.to_vector . should_equal [Nothing, 1, 0, 1] + + Test.specify "should not lose the type after further operations were performed on the result" <| + t = table_builder [["X", [1, 2, 3000]], ["Y", [True, False, True]]] + c1 = t.at "X" . cast Value_Type.Char + c2 = t.at "Y" . cast Value_Type.Integer + + c3 = c1 + '_suffix' + c3.value_type.is_text . should_be_true + c3.to_vector . should_equal ["1_suffix", "2_suffix", "3000_suffix"] + + c4 = c2 + 1000 + vt4 = c4.value_type + Test.with_clue "Expecting "+vt4.to_display_text+" to be Integer. " <| + vt4.is_integer . should_be_true + c4.to_vector . should_equal [1001, 1000, 1001] + + Test.specify "should not lose the type after further operations were performed on the result, even if the first row is NULL" <| + t = table_builder [["X", [Nothing, 1, 2, 3000]], ["Y", [Nothing, True, False, True]]] + c1 = t.at "X" . cast Value_Type.Char + c2 = t.at "Y" . cast Value_Type.Integer + + c3 = c1 + '_suffix' + c3.value_type.is_text . should_be_true + c3.to_vector . should_equal [Nothing, "1_suffix", "2_suffix", "3000_suffix"] + + c4 = c2 + 1000 + vt4 = c4.value_type + Test.with_clue "Expecting "+vt4.to_display_text+" to be Integer. " <| + vt4.is_integer . should_be_true + c4.to_vector . should_equal [Nothing, 1001, 1000, 1001] + + Test.specify 'Table.cast should cast the columns "in-place" and not reorder them' <| + t = table_builder [["X", [1, 2, 3000]], ["Y", [4, 5, 6]], ["Z", [7, 8, 9]], ["A", [True, False, True]]] + t2 = t.cast ["Z", "Y"] Value_Type.Char + t2.column_names . should_equal ["X", "Y", "Z", "A"] + + t2.at "X" . value_type . is_integer . should_be_true + t2.at "Y" . value_type . is_text . should_be_true + t2.at "Z" . value_type . is_text . should_be_true + t2.at "A" . value_type . is_boolean . should_be_true + + t2.at "X" . to_vector . should_equal [1, 2, 3000] + t2.at "Y" . to_vector . should_equal ["4", "5", "6"] + t2.at "Z" . to_vector . should_equal ["7", "8", "9"] + t2.at "A" . to_vector . should_equal [True, False, True] + + if setup.test_selection.fixed_length_text_columns then + Test.specify "should preserve the overridden types when materialized" pending="TODO: #5159 needed" <| + t = table_builder [["X", [1, 2, 100]], ["Y", ["a", "abcdef", "abc"]]] + t2 = t . cast "X" (Value_Type.Integer Bits.Bits_16) . cast "Y" (Value_Type.Char size=3 variable_length=False) + + t3 = materialize t2 + t3.at "X" . value_type . should_equal (t2.at "X" . value_type) + t3.at "Y" . value_type . should_equal (Value_Type.Char size=3 variable_length=False) + t3.at "Y" . to_vector . should_equal ["a ", "abc", "abc"] + + Test.group prefix+"Simple variant of Table/Column.parse in all backends" pending=(if setup.is_database then "parse is not yet implemented in DB") <| + Test.specify "should be able to parse simple integers" <| + t = table_builder [["X", ["42", "0", "-1"]]] + + c1 = t.at "X" . parse Value_Type.Integer + c1.value_type.is_integer . should_be_true + c1.to_vector . should_equal [42, 0, -1] + + c2 = t.parse ["X"] Value_Type.Integer . at "X" + c2.value_type.is_integer . should_be_true + c2.to_vector . should_equal [42, 0, -1] + + Test.specify "should be able to parse simple floats" <| + t = table_builder [["X", ["42.5", "0.25", "-1.0"]]] + + c1 = t.at "X" . parse Value_Type.Float + c1.value_type.is_floating_point . should_be_true + c1.to_vector . should_equal [42.5, 0.25, -1.0] + + c2 = t.parse ["X"] Value_Type.Float . at "X" + c2.value_type.is_floating_point . should_be_true + c2.to_vector . should_equal [42.5, 0.25, -1.0] + + if supports_dates then + Test.specify "should be able to parse dates using a default format" <| + t = table_builder [["X", ["2018-01-01", "2023-12-31"]]] + + c1 = t.at "X" . parse Value_Type.Date + c1.value_type.should_equal Value_Type.Date + c1.to_vector . should_equal [Date.new 2018 1 1, Date.new 2023 12 31] + + c2 = t.parse ["X"] Value_Type.Date . at "X" + c2.value_type.should_equal Value_Type.Date + c2.to_vector . should_equal [Date.new 2018 1 1, Date.new 2023 12 31] + if supports_dates.not then + Test.specify "should report that date parsing is unsupported" <| + t = table_builder [["X", ["2018-01-01", "2023-12-31"]]] + + r1 = t.at "X" . parse Value_Type.Date + r1.should_fail_with Unsupported_Database_Operation + + r2 = t.parse ["X"] Value_Type.Date + r2.should_fail_with Unsupported_Database_Operation + + Test.specify "should be able to parse booleans with default format" <| + t = table_builder [["X", ["true", "false", "true"]]] + + c1 = t.at "X" . parse Value_Type.Boolean + c1.value_type.should_equal Value_Type.Boolean + c1.to_vector . should_equal [True, False, True] + + c2 = t.parse ["X"] Value_Type.Boolean . at "X" + c2.value_type.should_equal Value_Type.Boolean + c2.to_vector . should_equal [True, False, True] diff --git a/test/Table_Tests/src/Common_Table_Operations/Main.enso b/test/Table_Tests/src/Common_Table_Operations/Main.enso index d6cd891ebc9d..1211f6d7e941 100644 --- a/test/Table_Tests/src/Common_Table_Operations/Main.enso +++ b/test/Table_Tests/src/Common_Table_Operations/Main.enso @@ -4,7 +4,7 @@ import project.Common_Table_Operations.Aggregate_Spec import project.Common_Table_Operations.Column_Operations_Spec import project.Common_Table_Operations.Core_Spec import project.Common_Table_Operations.Cross_Tab_Spec -import project.Common_Table_Operations.Cast_Spec +import project.Common_Table_Operations.Conversion_Spec import project.Common_Table_Operations.Date_Time_Spec import project.Common_Table_Operations.Distinct_Spec import project.Common_Table_Operations.Expression_Spec @@ -95,7 +95,7 @@ spec setup = Select_Columns_Spec.spec setup Column_Operations_Spec.spec setup Date_Time_Spec.spec setup - Cast_Spec.spec setup + Conversion_Spec.spec setup Aggregate_Spec.spec setup Filter_Spec.spec setup Missing_Values_Spec.spec setup diff --git a/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java b/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java index 35d0ff6a3230..8b6f03b20d10 100644 --- a/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java +++ b/test/Tests/polyglot-sources/enso-test-java-helpers/src/main/java/org/enso/table_test_helpers/ExplodingStorage.java @@ -1,6 +1,7 @@ package org.enso.table_test_helpers; import org.enso.table.data.column.builder.object.Builder; +import org.enso.table.data.column.operation.CastProblemBuilder; import org.enso.table.data.column.operation.map.MapOperationProblemBuilder; import org.enso.table.data.column.storage.Storage; import org.enso.table.data.column.storage.type.IntegerType; @@ -107,4 +108,9 @@ public Builder createDefaultBuilderOfSameType(int capacity) { public Storage slice(List ranges) { return null; } + + @Override + public Storage cast(StorageType targetType, CastProblemBuilder castProblemBuilder) { + return null; + } }