Skip to content

Commit

Permalink
Excel DataLink (#9346)
Browse files Browse the repository at this point in the history
- Adds the Excel format as one of the formats supported when creating a data link.
- The data link can choose to read the file as a workbook, or read a sheet or range from it as a table, like `Excel_Format`.
- Also updated Delimited format dialog to allow customizing the quote style.
  • Loading branch information
radeusgd authored Mar 11, 2024
1 parent 1f6db1e commit e98306f
Show file tree
Hide file tree
Showing 20 changed files with 357 additions and 23 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -628,6 +628,7 @@
- [Added `Xml_Document.write`][9299]
- [Added `select_by_type` and `remove_by_type` to `Table` and `DB_Table`][9334]
- [Make File./ only accept Text][9330]
- [Implemented Excel Data Link][9346]

[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
Expand Down Expand Up @@ -909,8 +910,9 @@
[9249]: https://github.com/enso-org/enso/pull/9249
[9269]: https://github.com/enso-org/enso/pull/9269
[9299]: https://github.com/enso-org/enso/pull/9299
[9344]: https://github.com/enso-org/enso/pull/9344
[9330]: https://github.com/enso-org/enso/pull/9330
[9334]: https://github.com/enso-org/enso/pull/9334
[9346]: https://github.com/enso-org/enso/pull/9346

#### Enso Compiler

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,20 @@ v.test('correctly validates example S3 .datalink files with the schema', () => {
}
})

v.test('correctly validates example Table .datalink files with the schema', () => {
const schemas = [
'example-http-format-excel-workbook.datalink',
'example-http-format-excel-sheet.datalink',
'example-http-format-excel-range.datalink',
'example-http-format-delimited-custom-quote.datalink',
'example-http-format-delimited-ignore-quote.datalink',
]
for (const schema of schemas) {
const json = loadDataLinkFile(path.resolve(TABLE_DATA_LINKS_ROOT, schema))
testSchema(json, schema)
}
})

v.test('correctly validates example Database .datalink files with the schema', () => {
const schemas = ['postgres-db.datalink', 'postgres-table.datalink']
for (const schema of schemas) {
Expand Down
139 changes: 136 additions & 3 deletions app/ide-desktop/lib/dashboard/src/data/dataLinkSchema.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,13 @@
},
"required": ["type", "secretPath"]
},
"BooleanOrInfer": {
"anyOf": [
{ "title": "Infer", "const": "infer", "type": "string" },
{ "title": "True", "const": true, "type": "boolean" },
{ "title": "False", "const": false, "type": "boolean" }
]
},

"AwsAuth": {
"title": "AWS Authentication",
Expand Down Expand Up @@ -174,7 +181,8 @@
"anyOf": [
{ "$ref": "#/$defs/DefaultFormat" },
{ "$ref": "#/$defs/DelimitedFormat" },
{ "$ref": "#/$defs/JsonFormat" }
{ "$ref": "#/$defs/JsonFormat" },
{ "$ref": "#/$defs/ExcelFormat" }
]
},
"DefaultFormat": {
Expand Down Expand Up @@ -207,10 +215,17 @@
"headers": {
"title": "Headers",
"description": "Whether a header row containing column names is present.",
"type": "boolean"
"$ref": "#/$defs/BooleanOrInfer"
},
"quote_style": {
"title": "Custom Quotes",
"anyOf": [
{ "$ref": "#/$defs/DelimitedQuoteStyle" },
{ "$ref": "#/$defs/DelimitedQuoteStyleNo" }
]
}
},
"required": ["type", "subType", "delimiter"]
"required": ["type", "subType", "delimiter", "headers"]
},
"JsonFormat": {
"title": "JSON",
Expand All @@ -220,6 +235,124 @@
"subType": { "title": "Type", "const": "json", "type": "string" }
},
"required": ["type", "subType"]
},
"ExcelFormat": {
"title": "Excel",
"type": "object",
"properties": {
"type": { "title": "Type", "const": "format", "type": "string" },
"subType": { "title": "Type", "const": "excel", "type": "string" },
"section": {
"title": "Section",
"anyOf": [
{ "$ref": "#/$defs/ExcelSectionWorkbook" },
{ "$ref": "#/$defs/ExcelSectionSheet" },
{ "$ref": "#/$defs/ExcelSectionRange" }
]
}
},
"required": ["type", "subType", "section"]
},

"ExcelSectionWorkbook": {
"title": "Workbook",
"type": "object",
"properties": {
"type": { "const": "workbook", "type": "string" },
"defaultSheet": {
"title": "Default sheet",
"$comment": "I want the title to be 'Default sheet (for writing)' to make it clear without hovering, but then the label is too wide. Can we make the modal wider?",
"description": "The default sheet used when writing a table to this data link.",
"type": "string",
"minLength": 1,
"default": "EnsoSheet"
}
},
"required": ["type"]
},
"ExcelSectionSheet": {
"title": "Sheet",
"type": "object",
"properties": {
"type": { "const": "sheet", "type": "string" },
"name": {
"title": "Sheet name",
"description": "Must not be blank.",
"type": "string",
"minLength": 1
},
"headers": {
"title": "Headers",
"description": "Whether a header row containing column names is present at the top of the sheet.",
"$ref": "#/$defs/BooleanOrInfer"
}
},
"required": ["type", "name", "headers"]
},
"ExcelSectionRange": {
"title": "Range",
"type": "object",
"properties": {
"type": { "const": "range", "type": "string" },
"address": {
"title": "Address",
"description": "Must not be blank.",
"type": "string",
"minLength": 1,
"examples": ["Sheet1!A1:B2"]
},
"headers": {
"title": "Headers",
"description": "Whether a header row containing column names is present at the top of the range.",
"$ref": "#/$defs/BooleanOrInfer"
}
},
"required": ["type", "address", "headers"]
},

"DelimitedQuoteStyle": {
"title": "Custom quotes",
"type": "object",
"properties": {
"type": { "title": "Type", "const": "quote_style", "type": "string" },
"subType": {
"title": "Subtype",
"const": "with_quotes",
"type": "string"
},
"quote": {
"title": "Quote",
"description": "Must be exactly one character.",
"type": "string",
"minLength": 1,
"maxLength": 1,
"default": "\"",
"examples": ["\"", "'"]
},
"escape": {
"title": "Escape",
"description": "Must be exactly one character. The character that is prepended to the quote character to escape it.",
"type": "string",
"minLength": 1,
"maxLength": 1,
"default": "\"",
"examples": ["\"", "\\"]
}
},
"required": ["type", "subType", "quote", "escape"]
},
"DelimitedQuoteStyleNo": {
"type": "object",
"title": "Ignore quotes",
"properties": {
"type": { "title": "Type", "const": "quote_style", "type": "string" },
"subType": {
"title": "Subtype",
"const": "no_quotes",
"type": "string"
}
},
"required": ["type", "subType"]
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@ type Enso_Path
if raw_segments.is_empty then Error.throw (Illegal_Argument.Error "Invalid path - it should contain at least one segment.") else
organization_name = raw_segments.first
segments = raw_segments.drop 1 . filter s-> s.is_empty.not
if organization_name != Enso_User.current.name then Error.throw (Unimplemented.throw "Currently only resolving paths for the current user is supported.") else
if segments.is_empty then Enso_Path.Value organization_name [] Nothing else
asset_name = segments.last
Enso_Path.Value organization_name (segments.drop (Index_Sub_Range.Last 1)) asset_name
current_user_name = Enso_User.current.name
# The `if_not_error` is a workaround for https://github.com/enso-org/enso/issues/9283 and it can be removed after that is fixed.
current_user_name.if_not_error <|
if organization_name != current_user_name then Unimplemented.throw "Currently only resolving paths for the current user is supported." else
if segments.is_empty then Enso_Path.Value organization_name [] Nothing else
asset_name = segments.last
Enso_Path.Value organization_name (segments.drop (Index_Sub_Range.Last 1)) asset_name

## PRIVATE
resolve_parent self =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import project.Any.Any
import project.Data.Json.JS_Object
import project.Data.Json.Json
import project.Data.Numbers.Integer
import project.Data.Text.Case.Case
import project.Data.Text.Encoding.Encoding
import project.Data.Text.Text
import project.Data.Vector.Vector
Expand Down Expand Up @@ -245,3 +246,17 @@ JSON_Format.from (that : JS_Object) =

## A setting to infer the default behaviour of some option.
type Infer


## PRIVATE
A helper for parsing the JSON representation of `Boolean | Infer`.
It defaults to `Infer` if the value was `Nothing`.
parse_boolean_with_infer (field_name : Text) (value : Boolean | Text | Nothing) -> Boolean | Infer = case value of
True -> True
False -> False
Nothing -> Infer
headers_text : Text -> case headers_text.to_case Case.Lower of
"infer" -> Infer
"true" -> True
"false" -> False
_ -> Error.throw (Illegal_Argument.Error ("The field `"+field_name+"` must be a boolean or the string `infer`."))
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Standard.Base.System.File_Format_Metadata.File_Format_Metadata
import Standard.Base.System.Input_Stream.Input_Stream
from Standard.Base.Metadata.Choice import Option
from Standard.Base.Widget_Helpers import make_file_read_delimiter_selector
from Standard.Base.System.File_Format import parse_boolean_with_infer

import project.Data.Data_Formatter.Data_Formatter
import project.Data.Match_Columns.Match_Columns
Expand Down Expand Up @@ -169,14 +170,17 @@ Delimited_Format.from (that : JS_Object) =
encoding = encoding_name
. if_not_nothing (Encoding.from_name encoding_name)
. if_nothing Encoding.utf_8
headers = that.get "headers" . if_nothing Infer
headers = that.get "headers" |> parse_boolean_with_infer "headers"
skip_rows = that.get "skip_rows" . if_nothing 0
row_limit = that.get "row_limit"
keep_invalid_rows = that.get "keep_invalid_rows" . if_nothing True
quote_style = case that.get "quote_style" of
Nothing -> Quote_Style.With_Quotes
json -> Quote_Style.from json

unsupported_fields = ["quote_style", "value_formatter", "line_endings", "comment_character"]
unsupported_fields = ["value_formatter", "line_endings", "comment_character"]
case unsupported_fields.find that.contains_key if_missing=Nothing of
Nothing ->
Delimited_Format.Delimited delimiter=delimiter encoding=encoding headers=headers skip_rows=skip_rows row_limit=row_limit keep_invalid_rows=keep_invalid_rows
Delimited_Format.Delimited delimiter=delimiter encoding=encoding headers=headers skip_rows=skip_rows row_limit=row_limit quote_style=quote_style keep_invalid_rows=keep_invalid_rows
field ->
Error.throw (Illegal_Argument.Error ("The field `" ++ field ++ "` is currently not supported when deserializing the Delimited format from JSON."))
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from Standard.Base import all
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument

type Quote_Style
## Does not handle quotes at all.
Expand Down Expand Up @@ -30,4 +31,16 @@ type Quote_Style
The quote and escape characters must consist of exactly one code-point
(i.e. it can be only one character and complex characters like emojis
may not be used).
With_Quotes (always_quote : Boolean = False) (quote : Text = '"') (quote_escape : Text = '"')
With_Quotes (always_quote : Boolean = False) (quote : Text = '"') (quote_escape : Text = quote)

## PRIVATE
Quote_Style.from (that : JS_Object) =
sub_type = that.get "subType" if_missing=(Error.throw (Illegal_Argument.Error "Missing `subType` field in quote style."))
case sub_type.to_case Case.Lower of
"no_quotes" -> Quote_Style.No_Quotes
"with_quotes" ->
always_quote = that.get "alwaysQuote" if_missing=False
quote = that.get "quote" if_missing='"'
escape = that.get "escape" if_missing=quote
Quote_Style.With_Quotes always_quote quote escape
_ -> Error.throw (Illegal_Argument.Error ("Unknown quote style: " + sub_type))
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import Standard.Base.System.File_Format_Metadata.File_Format_Metadata
import Standard.Base.System.Input_Stream.Input_Stream
from Standard.Base.Metadata.Choice import Option
from Standard.Base.Metadata.Widget import Text_Input, Numeric_Input
from Standard.Base.System.File_Format import parse_boolean_with_infer

import project.Data.Match_Columns.Match_Columns
import project.Data.Table.Table
Expand Down Expand Up @@ -189,3 +190,22 @@ as_section (format : Excel_Format) -> Excel_Section = case format of
Excel_Section.Worksheet sheet headers skip_rows row_limit
Excel_Format.Range address headers skip_rows row_limit _ ->
Excel_Section.Cell_Range address headers skip_rows row_limit

## PRIVATE
Constructs an `Excel_Format` instance from JSON.
Only a subset of options is currently supported.
Excel_Format.from (that : JS_Object) =
section = that.get "section" if_missing=(Error.throw (Illegal_Argument.Error "The `section` field is required."))
case section.get "type" if_missing=(Error.throw (Illegal_Argument.Error "The `section.type` field is required.")) . to_case Case.Lower of
"workbook" ->
default_sheet = section.get "default_sheet" if_missing="EnsoSheet"
Excel_Format.Workbook default_sheet=default_sheet
"sheet" ->
name = section.get "name" if_missing=(Error.throw (Illegal_Argument.Error "The `name` field is required."))
headers = section.get "headers" |> parse_boolean_with_infer "headers"
Excel_Format.Sheet sheet=name headers=headers
"range" ->
address = section.get "address" if_missing=(Error.throw (Illegal_Argument.Error "The `address` field is required."))
headers = section.get "headers" |> parse_boolean_with_infer "headers"
Excel_Format.Range address=address headers=headers
unknown -> Error.throw (Illegal_Argument.Error "The `section.type` was "+unknown+" but it must be one of `workbook`, `sheet`, or `range`.")
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ public static Value findFormatForDataLinkSubType(String subType) {

var providers =
loader.stream()
.filter(provider -> subType.equals(provider.get().getDataLinkFormatName()))
.filter(provider -> subType.equalsIgnoreCase(provider.get().getDataLinkFormatName()))
.toList();
if (providers.isEmpty()) {
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,9 @@ protected String getModuleName() {
protected String getTypeName() {
return "Excel_Format";
}

@Override
protected String getDataLinkFormatName() {
return "excel";
}
}
2 changes: 1 addition & 1 deletion test/AWS_Tests/data/format-delimited.datalink
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@
"type": "format",
"subType": "delimited",
"delimiter": " ",
"headers": false
"headers": "infer"
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"type": "HTTP",
"libraryName": "Standard.Base",
"method": "GET",
"uri": "http://http-test-helper.local/testfiles/table.tsv",
"format": {
"type": "format",
"subType": "delimited",
"delimiter": "\t",
"headers": true,
"quote_style": {
"type": "quote_style",
"subType": "with_quotes",
"quote": "'",
"escape": "\\"
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
"type": "HTTP",
"libraryName": "Standard.Base",
"method": "GET",
"uri": "http://http-test-helper.local/testfiles/js.txt",
"format": {
"type": "format",
"subType": "delimited",
"delimiter": " ",
"headers": false,
"quote_style": {
"type": "quote_style",
"subType": "no_quotes"
}
}
}
Loading

0 comments on commit e98306f

Please sign in to comment.