Skip to content

Commit

Permalink
Add to_xml component (#8979)
Browse files Browse the repository at this point in the history
Adds new to_xml component
  • Loading branch information
AdRiley authored Feb 7, 2024
1 parent 62cfa8a commit e3f6ff1
Show file tree
Hide file tree
Showing 4 changed files with 557 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -613,6 +613,7 @@
JSON parser based off Jackson.][8719]
- [Implemented `Table.replace` for the in-memory backend.][8935]
- [Allow removing rows using a Filter_Condition.][8861]
- [Added `Table.to_xml`.][8979]

[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
Expand Down Expand Up @@ -882,6 +883,7 @@
[8865]: https://github.com/enso-org/enso/pull/8865
[8935]: https://github.com/enso-org/enso/pull/8935
[8861]: https://github.com/enso-org/enso/pull/8861
[8979]: https://github.com/enso-org/enso/pull/8979

#### Enso Compiler

Expand Down
62 changes: 62 additions & 0 deletions distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ polyglot java import org.enso.table.data.table.join.conditions.Equals as Java_Jo
polyglot java import org.enso.table.data.table.join.conditions.EqualsIgnoreCase as Java_Join_Equals_Ignore_Case
polyglot java import org.enso.table.data.table.join.lookup.LookupJoin
polyglot java import org.enso.table.data.table.Table as Java_Table
polyglot java import org.enso.table.data.table.TableToXml as Java_TableToXml
polyglot java import org.enso.table.error.NonUniqueLookupKey
polyglot java import org.enso.table.error.NullValuesInKeyColumns
polyglot java import org.enso.table.error.TooManyColumnsException
Expand Down Expand Up @@ -2612,6 +2613,67 @@ type Table
## Creates a text representation of the table using the CSV format.
to_csv : Text
to_csv self = Text.from self (Delimited_Format.Delimited delimiter=",")

## GROUP Standard.Base.Conversions
Returns a string containing an XML representation of the table.
Arguments:
- element_columns: Columns to be used as elements in the XML.
- attribute_columns: Columns to be used as attributes in the XML.
- value_column: Column to be used as the value for the row tag in the XML.
- root_name: The name of the root tag in the XML.
- row_name: The name of the row tag in the XML.
- on_problems: Specifies how to handle warnings if they occur, reporting
them as warnings by default.

! Error Conditions

- If a column in `element_columns`, `attribute_columns` or `value_column` is not in
the input table, a `Missing_Input_Columns` is raised as an error.
- If any incomming columns aree not specified in one of `element_columns`,
`attribute_columns` or `value_column`, a `Unexpected_Extra_Columns`
is reported according to the `on_problems` setting.

? Example to_xml Operation

Input Table `table`:

Title | Author | Price | Year
------------------------+---------------------+-------+------
A Tale Of Two Cities | Charles Dickens | 9.99 | 1859
The Great Gatsby | F. Scott Fitzgerald | 5.99 | 1925

Result `r = t.to_xml ["Year"] ["Author", "Price"] "Title" "Books" "Book"`:

<Books>
<Book Author="Charles Dickens" Price="9.99">
A Tale Of Two Cities
<Year>1859</Year>
</Book>
<Book Author="F. Scott Fitzgerald" Price="5.99">
The Great Gatsby
<Year>1925</Year>
</Book>
</Books>
@element_columns Widget_Helpers.make_column_name_vector_selector
@attribute_columns Widget_Helpers.make_column_name_vector_selector
@value_column Widget_Helpers.make_column_name_selector
to_xml self (element_columns : (Vector (Integer | Text | Regex) | Text | Integer | Regex) = self.column_names) (attribute_columns : (Vector (Integer | Text | Regex) | Text | Integer | Regex) = []) (value_column : Text | Integer | Nothing = Nothing) (root_name : Text = "Table") (row_name : Text = "Row") (on_problems : Problem_Behavior = Report_Warning) -> Text =
columns_helper = self.columns_helper
problem_builder = Problem_Builder.new error_on_missing_columns=True
resolved_element_columns = columns_helper.select_columns_helper element_columns Case_Sensitivity.Default False problem_builder
java_element_columns = resolved_element_columns.map .java_column

resolved_attribute_columns = columns_helper.select_columns_helper attribute_columns Case_Sensitivity.Default False problem_builder
java_attribute_column = resolved_attribute_columns.map .java_column

resolved_value_column = if value_column.is_nothing then Nothing else (self.at value_column)
java_value_column = if value_column.is_nothing then Nothing else resolved_value_column.java_column

unused_columns = columns_helper.internal_columns.filter (Filter_Condition.Not_In resolved_element_columns+resolved_attribute_columns+[resolved_value_column]) . map .name
if unused_columns.length > 0 then problem_builder.report_other_warning (Unexpected_Extra_Columns.Warning unused_columns)

problem_builder.attach_problems_before on_problems <|
Java_TableToXml.to_xml self.row_count java_element_columns java_attribute_column java_value_column root_name row_name

## PRIVATE
columns_helper : Table_Column_Helper
Expand Down
113 changes: 113 additions & 0 deletions std-bits/table/src/main/java/org/enso/table/data/table/TableToXml.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
package org.enso.table.data.table;

import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.xmlbeans.XmlException;
import org.apache.xmlbeans.XmlObject;
import org.apache.xmlbeans.XmlOptions;
import org.graalvm.polyglot.Context;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

public class TableToXml {

public static String to_xml(
int rowCount,
Column[] element_columns,
Column[] attribute_columns,
Column value_Column,
String root_name,
String row_name)
throws XmlException, ParserConfigurationException {
var docFactory = DocumentBuilderFactory.newInstance();
var docBuilder = docFactory.newDocumentBuilder();
var doc = docBuilder.newDocument();
var rootElement = doc.createElement(makeXmlTagNameLegal(root_name));
doc.appendChild(rootElement);

Map<String, String> legal_column_names =
Stream.concat(Stream.of(element_columns), Stream.of(attribute_columns))
.collect(
Collectors.toMap(
e -> e.getName(), e -> makeXmlTagNameLegal(e.getName()), (e1, e2) -> e1));

var legal_row_name = makeXmlTagNameLegal(row_name);
var context = Context.getCurrent();
for (int row = 0; row < rowCount; row++) {
var rowElement = doc.createElement(legal_row_name);
if (value_Column != null) {
get_set_value(value_Column, row, rowElement);
}
for (var element_column : element_columns) {
var legal_name = legal_column_names.get(element_column.getName());
get_append_element(element_column, legal_name, row, doc, rowElement);
context.safepoint();
}
for (var attribute_column : attribute_columns) {
var legal_name = legal_column_names.get(attribute_column.getName());
get_set_attribute(attribute_column, legal_name, row, rowElement);
context.safepoint();
}
rootElement.appendChild(rowElement);
context.safepoint();
}

return convert_to_string(doc);
}

private static String convert_to_string(Document doc) throws XmlException {
var xmlObject = XmlObject.Factory.parse(doc);
var options = new XmlOptions();
options.setSavePrettyPrint();

String xmlString = xmlObject.xmlText(options);

return xmlString;
}

private static void get_set_attribute(
Column attribute_column, String legal_name, int row, Element rowElement) throws DOMException {
var item = attribute_column.getStorage().getItemBoxed(row);
if (item != null) {
rowElement.setAttribute(legal_name, item.toString());
}
}

private static void get_append_element(
Column element_column, String legal_name, int row, Document doc, Element rowElement)
throws DOMException {
var item = element_column.getStorage().getItemBoxed(row);
if (item != null) {
var columnElement = doc.createElement(legal_name);
columnElement.setTextContent(item.toString());
rowElement.appendChild(columnElement);
}
}

private static void get_set_value(Column value_Column, int row, Element rowElement)
throws DOMException {
var item = value_Column.getStorage().getItemBoxed(row);
if (item != null) {
rowElement.setTextContent(item.toString());
}
}

public static String makeXmlTagNameLegal(String input) {
String nameStartChar =
"[^:A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\ud800\udc00-\udbff\udfff]";
String nameChar =
"[^-.0-9:A-Z_a-z\u00B7\u0300-\u036F\u203F-\u2040\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF\u200C-\u200D\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD\ud800\udc00-\udbff\udfff]";
// XML tag names cannot start with a number or punctuation character, and cannot contain spaces
String cleaned = input.replaceAll(nameChar, "_");
// If the cleaned name is empty or doesn't start with a valid character, prefix it with an
// underscore
if (cleaned.isEmpty() || cleaned.substring(0, 1).matches(nameStartChar)) {
cleaned = "_" + cleaned;
}
return cleaned;
}
}
Loading

0 comments on commit e3f6ff1

Please sign in to comment.