-
Notifications
You must be signed in to change notification settings - Fork 71
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
f1dd95e
commit 80f18e6
Showing
4 changed files
with
232 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
array {string table, [array options]} Parses tabular data into an array ---- | ||
|
||
Given a string such as: | ||
|
||
<%PRE| | ||
column1 column2 column3 | ||
------- ------- ------- | ||
data a1 data a2 data a3 | ||
data b1 data b2 data b3 | ||
%> | ||
|
||
this function will parse the data into an associative array: | ||
|
||
<%CODE| | ||
array('column1': array('data a1', 'data b1'), 'column2': array('data a2', 'data b2'), 'column3': array('data a3', 'data b3')) | ||
%> | ||
|
||
The second parameter is an array of options, which can allow for more flexible input, though is optional, and has | ||
default values for all parameters. | ||
|
||
{| | ||
|- | ||
! scope="col" width="6%" | Setting | ||
! scope="col" width="10%" | Type | ||
! scope="col" width="6%" | Default | ||
! scope="col" width="78%" | Description | ||
|- | ||
| columns | ||
| array | ||
| null | ||
| If the string doesn't have column headings in the first line, these can be provided as an array here. Note that if you | ||
provide this parameter, columnWidth is a required parameter as well. | ||
|- | ||
| columnWidth | ||
| array | ||
| null | ||
| For data that isn't consistently formatted, you may need to provide your own values for the column widths. Normally, | ||
this is calculated automatically based on the first and second lines, but if those don't match the data, or aren't | ||
provided, you need to provide this manually. This should be an array of the same size or one less of the columns option, | ||
and should contain the width of each column, optionally skipping the last. | ||
For instance, in the example table shown above, the width should be array(16, 20) or array(16, 20, 7). If the last value | ||
is skipped, this means "the rest of the line". | ||
|- | ||
| tabWidth | ||
| int | ||
| 4 | ||
| Before converting the data, all tabs are normalized to spaces based on the tab width of the line. For instance, if the | ||
line of data is <pre>"a\tb\tc"</pre> then this will be converted to <pre>"a b c"</pre>, and then the column width | ||
data is used. In cases where data is separated using exclusively spaces this setting won't matter, as the column width | ||
and data should line up in any case. However, if tabs are used, it may misformat depending on the tab width assumptions | ||
that the data originated from. If you can control the data, it is more reliable to output data using spaces rather than | ||
tabs, or use a tab width of 4. | ||
|- | ||
| skipEmptyLines | ||
| boolean | ||
| true | ||
| If true, empty lines are totally skipped. If false, blank lines will add zero width strings in all the columns in their place. | ||
|} | ||
|
||
When using the automatic column width detection, it isn't required to have any particular character used as the header separator | ||
in the second line. Nor is it required to fill the line. It's merely required to have one or more space between each | ||
column, and then the column width is measured between the start of each character sequence. For instance, the following table | ||
would be properly parsed as well: | ||
|
||
<%PRE| | ||
column1 column2 column3 | ||
---------- ------- - | ||
a b c | ||
d e f | ||
%> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
string @data = @arguments[0]; | ||
array @options = array_get(@arguments, 1, associative_array()); | ||
|
||
array @ret = associative_array(); | ||
array @columns = array_get(@options, 'columns', array())[]; | ||
array @columnWidth = array_get(@options, 'columnWidth', array())[]; | ||
int @tabWidth = array_get(@options, 'tabWidth', 4); | ||
boolean @skipEmptyLines = array_get(@options, 'skipEmptyLines', true); | ||
|
||
array @lines = reg_split('\n|\r\n|\n\r', @data); | ||
|
||
closure @normalizeLine = iclosure(@line, @tabWidth) { | ||
@output = ""; | ||
for(@i = 0, @i < length(@line), @i++) { | ||
if(@line[@i] != '\t') { | ||
@output .= @line[@i]; | ||
} else { | ||
@output .= string_multiply(" ", @tabWidth - (length(@output) % @tabWidth)); | ||
} | ||
} | ||
return(@output); | ||
}; | ||
|
||
closure @splitLine = iclosure(@line, @columnWidths) { | ||
array @sections = array(); | ||
@lastX = 0; | ||
foreach(@width in @columnWidths) { | ||
try { | ||
if(@width == math_const('INFINITY') || @lastX + @width > length(@line)) { | ||
@sections[] = substr(@line, @lastX); | ||
} else { | ||
@sections[] = substr(@line, @lastX, @lastX + @width); | ||
} | ||
} catch (Exception @ex) { | ||
// The line ends before the last column, fill with empty cells. | ||
@sections[] = ""; | ||
} | ||
@lastX += @width | ||
} | ||
return(@sections); | ||
}; | ||
|
||
if(array_size(@columns) != 0) { | ||
// Validate input args | ||
if(array_size(@columnWidth) < array_size(@columns)) { | ||
@columnWidth[] = math_const('INFINITY'); | ||
} | ||
if(array_size(@columnWidth) != array_size(@columns)) { | ||
throw(FormatException, "columnWidth must be the same size as, or one less than the size of the columns array."); | ||
} | ||
} else { | ||
if(array_size(@lines) < 2) { | ||
throw(FormatException, "The input data must have at least 2 lines, the column names, and the header separator."); | ||
} | ||
// We need to calculate the columns and columnWidth ourselves. | ||
// The general approach here is to simply count the columns between | ||
// the beginning of the line and the start of the first character after | ||
// a space/tab character. This determines the width, then we go back and | ||
// use that data to calculate the values that go in @columns based on the first line. | ||
@columnNames = execute(@lines[0], @tabWidth, @normalizeLine); | ||
@columnSeparator = execute(@lines[1], @tabWidth, @normalizeLine); | ||
@inSpaces = false; | ||
@width = 0; | ||
for(@i = 0, @i < length(@columnSeparator), @i++) { | ||
@char = @columnSeparator[@i]; | ||
if(@inSpaces && @char != ' ') { | ||
// New column here. Finalize this column and reset. | ||
@columnWidth[] = @width; | ||
@width = 0; | ||
@inSpaces = false; | ||
} else if(@char == ' ') { | ||
@inSpaces = true; | ||
} | ||
@width++; | ||
} | ||
// Push infinity on the end, which is the last column width | ||
@columnWidth[] = math_const('INFINITY'); | ||
// Now we know the column widths, parse the column names | ||
@columnNames = array_map(execute(@columnNames, @columnWidth, @splitLine), closure(@item) {return(trim(@item))}); | ||
|
||
// remove the first two lines | ||
array_remove(@lines, 0); | ||
array_remove(@lines, 0); | ||
} | ||
|
||
foreach(@column in @columnNames) { | ||
@ret[@column] = array(); | ||
} | ||
|
||
foreach(@line in @lines) { | ||
if(@skipEmptyLines) { | ||
if(trim(@line) == "") { | ||
continue(); | ||
} | ||
} | ||
@columns = execute(execute(@line, @tabWidth, @normalizeLine), @columnWidth, @splitLine); | ||
for(@i = 0, @i < array_size(@columns), @i++) { | ||
@ret[@columnNames[@i]][] = trim(@columns[@i]); | ||
} | ||
} | ||
|
||
return(@ret); | ||
|
||
// DONE?? |