Skip to content

Commit

Permalink
Add parse_text_table()
Browse files Browse the repository at this point in the history
  • Loading branch information
LadyCailin committed Feb 11, 2021
1 parent f1dd95e commit 80f18e6
Show file tree
Hide file tree
Showing 4 changed files with 232 additions and 1 deletion.
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
package com.laytonsmith.core.functions;

import com.laytonsmith.PureUtilities.Common.StreamUtils;
import com.laytonsmith.core.MSLog;
import com.laytonsmith.core.MethodScriptCompiler;
import com.laytonsmith.core.ParseTree;
import com.laytonsmith.core.Prefs;
import com.laytonsmith.core.Script;
import com.laytonsmith.core.compiler.analysis.ParamDeclaration;
import com.laytonsmith.core.compiler.analysis.Scope;
Expand All @@ -19,6 +21,7 @@
import com.laytonsmith.core.exceptions.ConfigRuntimeException;
import com.laytonsmith.core.exceptions.FunctionReturnException;
import com.laytonsmith.core.natives.interfaces.Mixed;
import java.io.File;

import java.util.HashMap;
import java.util.Map;
Expand All @@ -40,13 +43,17 @@ public final Mixed exec(Target t, Environment env, Mixed... args) throws ConfigR
ParseTree tree;
// TODO: Ultimately, this is not scalable. We need to compile and cache these scripts at Java compile time,
// not at runtime the first time a function is used. This is an easier first step though.
File debugFile = null;
if(Prefs.DebugMode()) {
debugFile = new File("/NATIVE-MSCRIPT/" + getName());
}
if(!CACHED_SCRIPTS.containsKey(this.getClass())) {
try {

String script = script();
Scope rootScope = new Scope();
rootScope.addDeclaration(new ParamDeclaration("@arguments", CArray.TYPE, Target.UNKNOWN));
tree = MethodScriptCompiler.compile(MethodScriptCompiler.lex(script, env, null, true),
tree = MethodScriptCompiler.compile(MethodScriptCompiler.lex(script, env, debugFile, true),
env, env.getEnvClasses(), new StaticAnalysis(rootScope, true))
// the root of the tree is null, so go ahead and pull it up
.getChildAt(0);
Expand Down Expand Up @@ -77,6 +84,10 @@ public final Mixed exec(Target t, Environment env, Mixed... args) throws ConfigR
} catch (FunctionReturnException ex) {
ret = ex.getReturn();
} catch (ConfigRuntimeException ex) {
if(Prefs.DebugMode()) {
MSLog.GetLogger().e(MSLog.Tags.GENERAL, "Possibly false stacktrace, could be internal error",
ex.getTarget());
}
if(gEnv.GetStackTraceManager().getCurrentStackTrace().isEmpty()) {
ex.setTarget(t);
ConfigRuntimeException.StackTraceElement ste = new ConfigRuntimeException
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -525,4 +525,50 @@ public Version since() {
}

}

@api
public static class parse_text_table extends CompositeFunction {

@Override
public Class<? extends CREThrowable>[] thrown() {
return new Class[]{CREFormatException.class};
}

@Override
public boolean isRestricted() {
return false;
}

@Override
public Boolean runAsync() {
return null;
}


@Override
public String getName() {
return "parse_text_table";
}

@Override
public Integer[] numArgs() {
return new Integer[]{1, 2};
}

@Override
public String docs() {
return getBundledDocs();
}

@Override
public Version since() {
return MSVersion.V3_3_4;
}

@Override
protected String script() {
return getBundledCode();
}

}
}
70 changes: 70 additions & 0 deletions src/main/resources/functionDocs/parse_text_table
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
array {string table, [array options]} Parses tabular data into an array ----

Given a string such as:

<%PRE|
column1 column2 column3
------- ------- -------
data a1 data a2 data a3
data b1 data b2 data b3
%>

this function will parse the data into an associative array:

<%CODE|
array('column1': array('data a1', 'data b1'), 'column2': array('data a2', 'data b2'), 'column3': array('data a3', 'data b3'))
%>

The second parameter is an array of options, which can allow for more flexible input, though is optional, and has
default values for all parameters.

{|
|-
! scope="col" width="6%" | Setting
! scope="col" width="10%" | Type
! scope="col" width="6%" | Default
! scope="col" width="78%" | Description
|-
| columns
| array
| null
| If the string doesn't have column headings in the first line, these can be provided as an array here. Note that if you
provide this parameter, columnWidth is a required parameter as well.
|-
| columnWidth
| array
| null
| For data that isn't consistently formatted, you may need to provide your own values for the column widths. Normally,
this is calculated automatically based on the first and second lines, but if those don't match the data, or aren't
provided, you need to provide this manually. This should be an array of the same size or one less of the columns option,
and should contain the width of each column, optionally skipping the last.
For instance, in the example table shown above, the width should be array(16, 20) or array(16, 20, 7). If the last value
is skipped, this means "the rest of the line".
|-
| tabWidth
| int
| 4
| Before converting the data, all tabs are normalized to spaces based on the tab width of the line. For instance, if the
line of data is <pre>"a\tb\tc"</pre> then this will be converted to <pre>"a b c"</pre>, and then the column width
data is used. In cases where data is separated using exclusively spaces this setting won't matter, as the column width
and data should line up in any case. However, if tabs are used, it may misformat depending on the tab width assumptions
that the data originated from. If you can control the data, it is more reliable to output data using spaces rather than
tabs, or use a tab width of 4.
|-
| skipEmptyLines
| boolean
| true
| If true, empty lines are totally skipped. If false, blank lines will add zero width strings in all the columns in their place.
|}

When using the automatic column width detection, it isn't required to have any particular character used as the header separator
in the second line. Nor is it required to fill the line. It's merely required to have one or more space between each
column, and then the column width is measured between the start of each character sequence. For instance, the following table
would be properly parsed as well:

<%PRE|
column1 column2 column3
---------- ------- -
a b c
d e f
%>
104 changes: 104 additions & 0 deletions src/main/resources/function_impl/parse_text_table.ms
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
string @data = @arguments[0];
array @options = array_get(@arguments, 1, associative_array());

array @ret = associative_array();
array @columns = array_get(@options, 'columns', array())[];
array @columnWidth = array_get(@options, 'columnWidth', array())[];
int @tabWidth = array_get(@options, 'tabWidth', 4);
boolean @skipEmptyLines = array_get(@options, 'skipEmptyLines', true);

array @lines = reg_split('\n|\r\n|\n\r', @data);

closure @normalizeLine = iclosure(@line, @tabWidth) {
@output = "";
for(@i = 0, @i < length(@line), @i++) {
if(@line[@i] != '\t') {
@output .= @line[@i];
} else {
@output .= string_multiply(" ", @tabWidth - (length(@output) % @tabWidth));
}
}
return(@output);
};

closure @splitLine = iclosure(@line, @columnWidths) {
array @sections = array();
@lastX = 0;
foreach(@width in @columnWidths) {
try {
if(@width == math_const('INFINITY') || @lastX + @width > length(@line)) {
@sections[] = substr(@line, @lastX);
} else {
@sections[] = substr(@line, @lastX, @lastX + @width);
}
} catch (Exception @ex) {
// The line ends before the last column, fill with empty cells.
@sections[] = "";
}
@lastX += @width
}
return(@sections);
};

if(array_size(@columns) != 0) {
// Validate input args
if(array_size(@columnWidth) < array_size(@columns)) {
@columnWidth[] = math_const('INFINITY');
}
if(array_size(@columnWidth) != array_size(@columns)) {
throw(FormatException, "columnWidth must be the same size as, or one less than the size of the columns array.");
}
} else {
if(array_size(@lines) < 2) {
throw(FormatException, "The input data must have at least 2 lines, the column names, and the header separator.");
}
// We need to calculate the columns and columnWidth ourselves.
// The general approach here is to simply count the columns between
// the beginning of the line and the start of the first character after
// a space/tab character. This determines the width, then we go back and
// use that data to calculate the values that go in @columns based on the first line.
@columnNames = execute(@lines[0], @tabWidth, @normalizeLine);
@columnSeparator = execute(@lines[1], @tabWidth, @normalizeLine);
@inSpaces = false;
@width = 0;
for(@i = 0, @i < length(@columnSeparator), @i++) {
@char = @columnSeparator[@i];
if(@inSpaces && @char != ' ') {
// New column here. Finalize this column and reset.
@columnWidth[] = @width;
@width = 0;
@inSpaces = false;
} else if(@char == ' ') {
@inSpaces = true;
}
@width++;
}
// Push infinity on the end, which is the last column width
@columnWidth[] = math_const('INFINITY');
// Now we know the column widths, parse the column names
@columnNames = array_map(execute(@columnNames, @columnWidth, @splitLine), closure(@item) {return(trim(@item))});

// remove the first two lines
array_remove(@lines, 0);
array_remove(@lines, 0);
}

foreach(@column in @columnNames) {
@ret[@column] = array();
}

foreach(@line in @lines) {
if(@skipEmptyLines) {
if(trim(@line) == "") {
continue();
}
}
@columns = execute(execute(@line, @tabWidth, @normalizeLine), @columnWidth, @splitLine);
for(@i = 0, @i < array_size(@columns), @i++) {
@ret[@columnNames[@i]][] = trim(@columns[@i]);
}
}

return(@ret);

// DONE??

0 comments on commit 80f18e6

Please sign in to comment.