diff --git a/CHANGELOG.md b/CHANGELOG.md index 12c504419ccb..174cfd06ee3c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -613,6 +613,7 @@ JSON parser based off Jackson.][8719] - [Implemented `Table.replace` for the in-memory backend.][8935] - [Allow removing rows using a Filter_Condition.][8861] +- [Added `Table.to_xml`.][8979] [debug-shortcuts]: https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug @@ -882,6 +883,7 @@ [8865]: https://github.com/enso-org/enso/pull/8865 [8935]: https://github.com/enso-org/enso/pull/8935 [8861]: https://github.com/enso-org/enso/pull/8861 +[8979]: https://github.com/enso-org/enso/pull/8979 #### Enso Compiler diff --git a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso index d9ce8ddf4406..3270e7fc7594 100644 --- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso +++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso @@ -74,6 +74,7 @@ polyglot java import org.enso.table.data.table.join.conditions.Equals as Java_Jo polyglot java import org.enso.table.data.table.join.conditions.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case polyglot java import org.enso.table.data.table.join.lookup.LookupJoin polyglot java import org.enso.table.data.table.Table as Java_Table +polyglot java import org.enso.table.data.table.TableToXml as Java_TableToXml polyglot java import org.enso.table.error.NonUniqueLookupKey polyglot java import org.enso.table.error.NullValuesInKeyColumns polyglot java import org.enso.table.error.TooManyColumnsException @@ -2612,6 +2613,67 @@ type Table ## Creates a text representation of the table using the CSV format. to_csv : Text to_csv self = Text.from self (Delimited_Format.Delimited delimiter=",") + + ## GROUP Standard.Base.Conversions + Returns a string containing an XML representation of the table. + Arguments: + - element_columns: Columns to be used as elements in the XML. + - attribute_columns: Columns to be used as attributes in the XML. + - value_column: Column to be used as the value for the row tag in the XML. + - root_name: The name of the root tag in the XML. + - row_name: The name of the row tag in the XML. + - on_problems: Specifies how to handle warnings if they occur, reporting + them as warnings by default. + + ! Error Conditions + + - If a column in `element_columns`, `attribute_columns` or `value_column` is not in + the input table, a `Missing_Input_Columns` is raised as an error. + - If any incomming columns aree not specified in one of `element_columns`, + `attribute_columns` or `value_column`, a `Unexpected_Extra_Columns` + is reported according to the `on_problems` setting. + + ? Example to_xml Operation + + Input Table `table`: + + Title | Author | Price | Year + ------------------------+---------------------+-------+------ + A Tale Of Two Cities | Charles Dickens | 9.99 | 1859 + The Great Gatsby | F. Scott Fitzgerald | 5.99 | 1925 + + Result `r = t.to_xml ["Year"] ["Author", "Price"] "Title" "Books" "Book"`: + + + + A Tale Of Two Cities + 1859 + + + The Great Gatsby + 1925 + + + @element_columns Widget_Helpers.make_column_name_vector_selector + @attribute_columns Widget_Helpers.make_column_name_vector_selector + @value_column Widget_Helpers.make_column_name_selector + to_xml self (element_columns : (Vector (Integer | Text | Regex) | Text | Integer | Regex) = self.column_names) (attribute_columns : (Vector (Integer | Text | Regex) | Text | Integer | Regex) = []) (value_column : Text | Integer | Nothing = Nothing) (root_name : Text = "Table") (row_name : Text = "Row") (on_problems : Problem_Behavior = Report_Warning) -> Text = + columns_helper = self.columns_helper + problem_builder = Problem_Builder.new error_on_missing_columns=True + resolved_element_columns = columns_helper.select_columns_helper element_columns Case_Sensitivity.Default False problem_builder + java_element_columns = resolved_element_columns.map .java_column + + resolved_attribute_columns = columns_helper.select_columns_helper attribute_columns Case_Sensitivity.Default False problem_builder + java_attribute_column = resolved_attribute_columns.map .java_column + + resolved_value_column = if value_column.is_nothing then Nothing else (self.at value_column) + java_value_column = if value_column.is_nothing then Nothing else resolved_value_column.java_column + + unused_columns = columns_helper.internal_columns.filter (Filter_Condition.Not_In resolved_element_columns+resolved_attribute_columns+[resolved_value_column]) . map .name + if unused_columns.length > 0 then problem_builder.report_other_warning (Unexpected_Extra_Columns.Warning unused_columns) + + problem_builder.attach_problems_before on_problems <| + Java_TableToXml.to_xml self.row_count java_element_columns java_attribute_column java_value_column root_name row_name ## PRIVATE columns_helper : Table_Column_Helper diff --git a/std-bits/table/src/main/java/org/enso/table/data/table/TableToXml.java b/std-bits/table/src/main/java/org/enso/table/data/table/TableToXml.java new file mode 100644 index 000000000000..f785d7f252c1 --- /dev/null +++ b/std-bits/table/src/main/java/org/enso/table/data/table/TableToXml.java @@ -0,0 +1,113 @@ +package org.enso.table.data.table; + +import java.util.Map; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import org.apache.xmlbeans.XmlException; +import org.apache.xmlbeans.XmlObject; +import org.apache.xmlbeans.XmlOptions; +import org.graalvm.polyglot.Context; +import org.w3c.dom.DOMException; +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +public class TableToXml { + + public static String to_xml( + int rowCount, + Column[] element_columns, + Column[] attribute_columns, + Column value_Column, + String root_name, + String row_name) + throws XmlException, ParserConfigurationException { + var docFactory = DocumentBuilderFactory.newInstance(); + var docBuilder = docFactory.newDocumentBuilder(); + var doc = docBuilder.newDocument(); + var rootElement = doc.createElement(makeXmlTagNameLegal(root_name)); + doc.appendChild(rootElement); + + Map legal_column_names = + Stream.concat(Stream.of(element_columns), Stream.of(attribute_columns)) + .collect( + Collectors.toMap( + e -> e.getName(), e -> makeXmlTagNameLegal(e.getName()), (e1, e2) -> e1)); + + var legal_row_name = makeXmlTagNameLegal(row_name); + var context = Context.getCurrent(); + for (int row = 0; row < rowCount; row++) { + var rowElement = doc.createElement(legal_row_name); + if (value_Column != null) { + get_set_value(value_Column, row, rowElement); + } + for (var element_column : element_columns) { + var legal_name = legal_column_names.get(element_column.getName()); + get_append_element(element_column, legal_name, row, doc, rowElement); + context.safepoint(); + } + for (var attribute_column : attribute_columns) { + var legal_name = legal_column_names.get(attribute_column.getName()); + get_set_attribute(attribute_column, legal_name, row, rowElement); + context.safepoint(); + } + rootElement.appendChild(rowElement); + context.safepoint(); + } + + return convert_to_string(doc); + } + + private static String convert_to_string(Document doc) throws XmlException { + var xmlObject = XmlObject.Factory.parse(doc); + var options = new XmlOptions(); + options.setSavePrettyPrint(); + + String xmlString = xmlObject.xmlText(options); + + return xmlString; + } + + private static void get_set_attribute( + Column attribute_column, String legal_name, int row, Element rowElement) throws DOMException { + var item = attribute_column.getStorage().getItemBoxed(row); + if (item != null) { + rowElement.setAttribute(legal_name, item.toString()); + } + } + + private static void get_append_element( + Column element_column, String legal_name, int row, Document doc, Element rowElement) + throws DOMException { + var item = element_column.getStorage().getItemBoxed(row); + if (item != null) { + var columnElement = doc.createElement(legal_name); + columnElement.setTextContent(item.toString()); + rowElement.appendChild(columnElement); + } + } + + private static void get_set_value(Column value_Column, int row, Element rowElement) + throws DOMException { + var item = value_Column.getStorage().getItemBoxed(row); + if (item != null) { + rowElement.setTextContent(item.toString()); + } + } + + public static String makeXmlTagNameLegal(String input) { + String nameStartChar = + "[^:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\ud800\udc00-\udbff\udfff]"; + String nameChar = + "[^-.0-9:A-Z_a-z\u00B7\u0300-\u036F\u203F-\u2040\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\ud800\udc00-\udbff\udfff]"; + // XML tag names cannot start with a number or punctuation character, and cannot contain spaces + String cleaned = input.replaceAll(nameChar, "_"); + // If the cleaned name is empty or doesn't start with a valid character, prefix it with an + // underscore + if (cleaned.isEmpty() || cleaned.substring(0, 1).matches(nameStartChar)) { + cleaned = "_" + cleaned; + } + return cleaned; + } +} diff --git a/test/Table_Tests/src/In_Memory/Table_Xml_Spec.enso b/test/Table_Tests/src/In_Memory/Table_Xml_Spec.enso new file mode 100644 index 000000000000..5ca9a84da981 --- /dev/null +++ b/test/Table_Tests/src/In_Memory/Table_Xml_Spec.enso @@ -0,0 +1,380 @@ +from Standard.Base import all +from Standard.Table import Table +from Standard.Test_New import all +from Standard.Table.Errors import all +import Standard.Base.Errors.Common.Type_Error + +# pretty printed xml actually makes the tests less pretty +# so all but one tests depretty the xml before comparing +depretty xmlText = + xmlText.replace (Regex.compile "(\r\n|\n)\s*") "" + +type Data + # | Title | Author | Price | Year | Last Borrowed + #---+------------------------+---------------------+-------+------+------------------------------------------ + # 0 | A Tale Of Two Cities | Charles Dickens | 9.99 | 1859 | 1999-01-02 03:40:00Z[Europe/London] + # 1 | The Great Gatsby | F. Scott Fitzgerald | 5.99 | 1925 | 2000-02-03 04:50:00Z[Europe/London] + # 2 | The Catcher In The Rye | J. D. Salinger | 6.99 | 1951 | 2001-03-04 05:25:00Z[Europe/London] + # 3 | The Grapes Of Wrath | John Steinbeck | 7.99 | 1939 | 2002-04-05 06:15:00+01:00[Europe/London] + Value ~table + + setup = + make_table = + title = ["Title", ["A Tale Of Two Cities", "The Great Gatsby", "The Catcher In The Rye", "The Grapes Of Wrath"]] + author = ["Author", ["Charles Dickens", "F. Scott Fitzgerald", "J. D. Salinger", "John Steinbeck"]] + price = ["Price", [9.99, 5.99, 6.99, 7.99]] + year = ["Year", [1859, 1925, 1951, 1939]] + last_borrowed = ["Last Borrowed", [Date_Time.new 1999 1 2 3 40, Date_Time.new 2000 2 3 4 50, Date_Time.new 2001 3 4 5 25, Date_Time.new 2002 4 5 6 15]] + + Table.new [title, author, price, year, last_borrowed] + Data.Value make_table + +add_specs suite_builder = + suite_builder.group "to_xml" group_builder-> + data = Data.setup + group_builder.specify "table with no rows should become empty Table element" <| + t = data.table . take 0 + r = t.to_xml + e = "" + r.should_equal e + group_builder.specify "if no columns are specified all fields become child elements" <| + t = data.table . select_columns ['Title', 'Year'] + r = t.to_xml + e = ''' +
+ + A Tale Of Two Cities + 1859 + + + The Great Gatsby + 1925 + + + The Catcher In The Rye + 1951 + + + The Grapes Of Wrath + 1939 + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Single column as element" <| + t = data.table . take 4 + r = t.to_xml ["Year"] + e = ''' + + + 1859 + + + 1925 + + + 1951 + + + 1939 + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Single column as attribute" <| + t = data.table . take 4 + r = t.to_xml [] ["Year"] + e = ''' + + + + + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Single column as value" <| + t = data.table . take 4 + r = t.to_xml [] [] "Year" + e = ''' + + 1859 + 1925 + 1951 + 1939 +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Different fields can be used as attributes, elements and value" <| + t = data.table . take 1 + r = t.to_xml ["Year"] ["Author", "Price"] "Title" + e = ''' + + + A Tale Of Two Cities + 1859 + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Can use indexes to specify columns" <| + t = data.table . take 1 + r = t.to_xml [3] [1, 2] 0 + e = ''' + + + A Tale Of Two Cities + 1859 + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Can use regex to specify columns for element and attribute" <| + t = data.table . take 1 + r = t.to_xml (Regex.compile ".*e") (Regex.compile ".*a.*") "Author" + e = ''' + + + Charles Dickens + A Tale Of Two Cities + 9.99 + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "The same fields can be used as attributes, elements and value" <| + t = data.table . take 1 + r = t.to_xml ["Author", "Price"] ["Author", "Price"] "Author" + e = ''' + + + Charles Dickens + Charles Dickens + 9.99 + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "If a field is specified duplicate times as an attribute or element it is only included once" <| + t = data.table . take 1 + r = t.to_xml ["Author", "Price", "Author"] ["Author", "Price", "Price"] "Author" + e = ''' + + + Charles Dickens + Charles Dickens + 9.99 + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Root and Row tags can be set" <| + t = data.table . take 1 + r = t.to_xml ["Year"] ["Author", "Price"] "Title" "library" "book" + e = ''' + + + A Tale Of Two Cities + 1859 + + + (depretty r).should_equal (depretty e) + group_builder.specify "Will warn if not all incoming columns specified" <| + t = data.table . take 1 + r = t.to_xml ["Year"] ["Author", "Price"] "Title" + e = ''' + + + A Tale Of Two Cities + 1859 + +
+ (depretty r).should_equal (depretty e) + Problems.expect_warning (Unexpected_Extra_Columns.Warning ["Last Borrowed"]) r + group_builder.specify "Will not warn if not all incoming columns specified, but warnings are ignored" <| + t = data.table . take 1 + r = t.to_xml ["Year"] ["Author", "Price"] "Title" on_problems=Problem_Behavior.Ignore + e = ''' + + + A Tale Of Two Cities + 1859 + +
+ (depretty r).should_equal (depretty e) + Problems.assume_no_problems r + group_builder.specify "Will error if not all incoming columns specified and on_problems set to Report_Error" <| + t = data.table + r = t.to_xml ["Year"] ["Author", "Price"] "Title" on_problems=Problem_Behavior.Report_Error + r.should_fail_with (Unexpected_Extra_Columns.Warning ["Last Borrowed"]) + group_builder.specify "Will error if configured with a element column that is not in the table" <| + t = data.table + r = t.to_xml ["Author", "Price", "Not In Table"] ["Year"] "Title" + r.should_fail_with (Missing_Input_Columns.Error ["Not In Table"]) + group_builder.specify "Will error if configured with a attribute column that is not in the table" <| + t = data.table + r = t.to_xml ["Author", "Price"] ["Year", "Not In Table"] "Title" + r.should_fail_with (Missing_Input_Columns.Error ["Not In Table"]) + group_builder.specify "Will error if configured with a value column that is not in the table" <| + t = data.table + r = t.to_xml ["Author", "Price"] ["Year"] "Not In Table" + r.should_fail_with (No_Such_Column.Error "Not In Table") + group_builder.specify "XML should be pretty printed" <| + # pretty printed xml actually makes the tests less pretty + # so all the other tests depretty the xml before comparing + # this one makes sure it actually is pretty printed for users + t = data.table . take 2 + r = t.to_xml ["Year"] ["Author", "Price"] "Title" + e = '\r\n' + + ' \r\n' + + ' A Tale Of Two Cities\r\n' + + ' 1859\r\n' + + ' \r\n' + + ' \r\n' + + ' The Great Gatsby\r\n' + + ' 1925\r\n' + + ' \r\n' + + '
' + r . should_equal e + group_builder.specify "Illegal xml names are cleaned up in root_name and row_name" <| + t = data.table . take 1 + r = t.to_xml ["Year"] ["Author", "Price"] "Title" "the library" "22book" + e = ''' + + <_22book Author="Charles Dickens" Price="9.99"> + A Tale Of Two Cities + 1859 + + + (depretty r).should_equal (depretty e) + group_builder.specify "Illegal xml names are cleaned up" <| + numeric_column_name = ["1", [10]] + illegal_column_name = ["Bad & symbols", [1]] + last_borrowed = ["Last Borrowed", [Date_Time.new 1999 1 2 3 40]] + # | 1 | Bad & symbols | Last Borrowed + #---+----+-----------------------+------------------------------------- + # 0 | 10 | 1 | 1999-01-02 03:40:00Z[Europe/London] + t = Table.new [numeric_column_name, illegal_column_name, last_borrowed] + r = t.to_xml ["Bad & symbols", "Last Borrowed", "1"] ["Bad & symbols", "Last Borrowed", "1"] + e = ''' + + + <_1>10 + 1 + 1999-01-02T03:40Z[Europe/London] + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Empty strings are empty attributes/elements. Nothing values omit the entire attribute/element" <| + desc_column = ["desc", ["Col1 and Col2 both have values", "Col1 has value, Col2 Nothing", "Col1 empty string, Col2 has value", "Col1 and Col2 both blank string", "Col1 and Col2 both Nothing" ]] + col1 = ["Col1", ["Value1", "Value2", "", "", Nothing]] + col2 = ["Col2", ["Value1", Nothing, "Value2", "", Nothing]] + # | desc | Col1 | Col2 + #---+-----------------------------------+---------+--------- + # 0 | Col1 and Col2 both have values | Value1 | Value1 + # 1 | Col1 has value, Col2 Nothing | Value2 | Nothing + # 2 | Col1 empty string, Col2 has value | | Value2 + # 3 | Col1 and Col2 both blank string | | + # 4 | Col1 and Col2 both Nothing | Nothing | Nothing + t = Table.new [desc_column, col1, col2] + r = t.to_xml ["Col1", "Col2"] ["desc", "Col1", "Col2"] "Col1" + e = ''' + + + Value1 + Value1 + Value1 + + + Value2 + Value2 + + + + Value2 + + + + + + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "Panic if wrong types passed in element_columns" <| + t = data.table + r = Panic.recover Any (t.to_xml 1.23) + r.to_text.should_equal "(Error: (Type_Error.Error Vector | Text | Integer | Regex 1.23 '`element_columns`'))" + group_builder.specify "Panic if wrong types passed in element_columns vector" pending='Not working' <| + t = data.table + r = Panic.recover Any (t.to_xml [1.23]) + r.to_text.should_equal "(Error: (Type_Error.Error Vector | Text | Integer | Regex 1.23 '`element_columns`'))" + group_builder.specify "Panic if wrong types passed in attribute_columns" <| + t = data.table + r = Panic.recover Any (t.to_xml [] 1.23) + r.to_text.should_equal "(Error: (Type_Error.Error Vector | Text | Integer | Regex 1.23 '`attribute_columns`'))" + group_builder.specify "Panic if wrong types passed in attribute_columns vector" pending='Not working' <| + t = data.table + r = Panic.recover Any (t.to_xml [] [1.23]) + r.to_text.should_equal "(Error: (Type_Error.Error Vector | Text | Integer | Regex 1.23 '`attribute_columns`'))" + group_builder.specify "Panic if wrong types passed in value_column" <| + t = data.table + r = Panic.recover Any (t.to_xml [] [] 1.23) + r.to_text.should_equal "(Error: (Type_Error.Error Text | Integer | Nothing 1.23 '`value_column`'))" + group_builder.specify "Panic if wrong types passed in root_name" <| + t = data.table + r = Panic.recover Any (t.to_xml [] [] "Year" 1.23) + r.to_text.should_equal "(Error: (Type_Error.Error Text 1.23 '`root_name`'))" + group_builder.specify "Panic if wrong types passed in row_name" <| + t = data.table + r = Panic.recover Any (t.to_xml [] [] "Year" "Table" 1.23) + r.to_text.should_equal "(Error: (Type_Error.Error Text 1.23 '`row_name`'))" + group_builder.specify "Panic if wrong types passed in on_problems" <| + t = data.table + r = Panic.recover Any (t.to_xml [] [] "Year" "Table" "row" 1.23) + r.to_text.should_equal "(Error: (Type_Error.Error Problem_Behavior 1.23 '`on_problems`'))" + group_builder.specify "works with unicode characters" <| + unicode_column = ["unicode", ['\u00A9', "👩‍🔬"]] + # | unicode + #---+----------- + # 0 | '\u00A9' + # 1 | 👩‍🔬 + t = Table.new [unicode_column] + r = t.to_xml ["unicode"] ["unicode"] "unicode" + e = ''' + + + \u00A9 + \u00A9 + + + 👩‍🔬 + 👩‍🔬 + +
+ (depretty r).should_equal (depretty e) + group_builder.specify "xml or special characters get escaped" <| + xml_column = ["xml", ["", "12", '']] + special_chars_column = ["special", ["<", "&>", "'"]] + # | xml | special + #---+----------------------+--------- + # 0 | | < + # 1 | 12 | &> + # 2 | | ' + t = Table.new [xml_column, special_chars_column] + r = t.to_xml ["xml", "special"] ["xml", "special"] "xml" + e = ''' + + + </Table> + </Table> + < + + + <tag>12</tag> + <tag>12</tag> + &> + + + <r><c v="1"></c></r> + <r><c v="1"></c></r> + ' + +
+ (depretty r).should_equal (depretty e) + +main = + suite = Test.build suite_builder-> + add_specs suite_builder + suite.run_with_filter +