From 7a90a011f72f1eed5b26ee4406ca9b4736790966 Mon Sep 17 00:00:00 2001 From: odino Date: Sun, 15 Sep 2019 11:22:27 +0400 Subject: [PATCH] array.tsv(), closes #279 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Arrays can now easily be formatted as TSV / CSV content: ``` array.tsv() array.tsv(",") ``` Here are some samples: ``` ⧐ [["LeBron", "James"], ["James", "Harden"]].tsv() LeBron James James Harden ⧐ [{"name": "Lebron", "last": "James", "jersey": 23}, {"name": "James", "last": "Harden"}].tsv("\t", ["name", "last", "jersey", "additional_key"]) name last jersey additional_key Lebron James 23 null James Harden null null ``` There is some complexity in this function due to the fact that we have to support custom headers, as well as the fact that we can both format arrays of objects as well as hashes. --- docs/types/array.md | 41 +++++++++++ evaluator/evaluator_test.go | 8 +++ evaluator/functions.go | 139 ++++++++++++++++++++++++++++++++++++ object/object.go | 3 + util/util.go | 14 ++++ 5 files changed, 205 insertions(+) diff --git a/docs/types/array.md b/docs/types/array.md index c5aef90d..418c6206 100644 --- a/docs/types/array.md +++ b/docs/types/array.md @@ -298,6 +298,47 @@ Sums the elements of the array. Only supported on arrays of numbers: [1, 1, 1].sum() # 3 ``` +### tsv([separator], [header]) + +Formats the array into TSV: + +``` bash +[["LeBron", "James"], ["James", "Harden"]].tsv() +LeBron James +James Harden +``` + +You can also specify the separator to be used if you +prefer not to use tabs: + +``` bash +[["LeBron", "James"], ["James", "Harden"]].tsv(",") +LeBron,James +James,Harden +``` + +The input array needs to be an array of arrays or hashes. If +you use hashes, their keys will be used as heading of the TSV: + +```bash +[{"name": "Lebron", "last": "James", "jersey": 23}, {"name": "James", "last": "Harden"}].tsv() +jersey last name +23 James Lebron +null Harden James +``` + +The heading will, by default, be a combination of all keys present in the hashes, +sorted alphabetically. If a key is missing in an hash, `null` will be used as value. +If you wish to specify the output format, you can pass a list of keys to be used +as header: + +```bash +[{"name": "Lebron", "last": "James", "jersey": 23}, {"name": "James", "last": "Harden"}].tsv("\t", ["name", "last", "jersey", "additional_key"]) +name last jersey additional_key +Lebron James 23 null +James Harden null null +``` + ### unique() Returns an array with unique values: diff --git a/evaluator/evaluator_test.go b/evaluator/evaluator_test.go index c970b986..c322c477 100644 --- a/evaluator/evaluator_test.go +++ b/evaluator/evaluator_test.go @@ -949,6 +949,14 @@ c")`, []string{"a", "b", "c"}}, {`"a = 2; return 10" >> "test-source-vs-require.abs.ignore"; a = 1; x = require("test-source-vs-require.abs.ignore"); a`, 1}, {`"a = 2; return 10" >> "test-source-vs-require.abs.ignore"; a = 1; x = source("test-source-vs-require.abs.ignore"); x`, 10}, {`"a = 2; return 10" >> "test-source-vs-require.abs.ignore"; a = 1; x = require("test-source-vs-require.abs.ignore"); x`, 10}, + {`[[1,2,3], [2,3,4]].tsv()`, "1\t2\t3\n2\t3\t4"}, + {`[1].tsv()`, "tsv() must be called on an array of arrays or objects, such as [[1, 2, 3], [4, 5, 6]], '[1]' given"}, + {`[{"c": 3, "b": "hello"}, {"b": 20, "c": 0}].tsv()`, "b\tc\nhello\t3\n20\t0"}, + {`[[1,2,3], [2,3,4]].tsv(",")`, "1,2,3\n2,3,4"}, + {`[[1,2,3], [2]].tsv(",")`, "1,2,3\n2"}, + {`[[1,2,3], [2,3,4]].tsv("abc")`, "1a2a3\n2a3a4"}, + {`[[1,2,3], [2,3,4]].tsv("")`, "the separator argument to the tsv() function needs to be a valid character, '' given"}, + {`[{"c": 3, "b": "hello"}, {"b": 20, "c": 0}].tsv("\t", ["c", "b", "a"])`, "c\tb\ta\n3\thello\tnull\n0\t20\tnull"}, } for _, tt := range tests { evaluated := testEval(tt.input) diff --git a/evaluator/functions.go b/evaluator/functions.go index 5331c229..d8465c53 100644 --- a/evaluator/functions.go +++ b/evaluator/functions.go @@ -3,6 +3,7 @@ package evaluator import ( "bufio" "crypto/rand" + "encoding/csv" "fmt" "io/ioutil" "math" @@ -335,10 +336,16 @@ func getFns() map[string]*object.Builtin { Types: []string{object.STRING_OBJ}, Fn: execFn, }, + // eval(code) -- evaluates code in the context of the current ABS environment "eval": &object.Builtin{ Types: []string{object.STRING_OBJ}, Fn: evalFn, }, + // tsv([[1,2,3,4], [5,6,7,8]]) -- converts an array into a TSV string + "tsv": &object.Builtin{ + Types: []string{object.ARRAY_OBJ}, + Fn: tsvFn, + }, } } @@ -1672,6 +1679,138 @@ func evalFn(tok token.Token, env *object.Environment, args ...object.Object) obj return evaluated } +// [[1,2], [3,4]].tsv() +// [{"a": 1, "b": 2}, {"b": 3, "c": 4}].tsv() +func tsvFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object { + // all arguments were passed + if len(args) == 3 { + err := validateArgs(tok, "tsv", args, 3, [][]string{{object.ARRAY_OBJ}, {object.STRING_OBJ}, {object.ARRAY_OBJ}}) + if err != nil { + return err + } + } + + // If no header was passed, let's set it to empty list by default + if len(args) == 2 { + err := validateArgs(tok, "tsv", args, 2, [][]string{{object.ARRAY_OBJ}, {object.STRING_OBJ}}) + if err != nil { + return err + } + args = append(args, &object.Array{Elements: []object.Object{}}) + } + + // If no separator and header was passed, let's set them to tab and empty list by default + if len(args) == 1 { + err := validateArgs(tok, "tsv", args, 1, [][]string{{object.ARRAY_OBJ}}) + if err != nil { + return err + } + args = append(args, &object.String{Value: "\t"}) + args = append(args, &object.Array{Elements: []object.Object{}}) + } + + array := args[0].(*object.Array) + separator := args[1].(*object.String).Value + + if len(separator) < 1 { + return newError(tok, "the separator argument to the tsv() function needs to be a valid character, '%s' given", separator) + } + // the final outut + out := &strings.Builder{} + tsv := csv.NewWriter(out) + tsv.Comma = rune(separator[0]) + + // whether our array is made of ALL arrays or ALL hashes + var isArray bool + var isHash bool + homogeneous := array.Homogeneous() + + if len(array.Elements) > 0 { + _, isArray = array.Elements[0].(*object.Array) + _, isHash = array.Elements[0].(*object.Hash) + } + + // if the array is not homogeneous, we cannot process it + if !homogeneous || (!isArray && !isHash) { + return newError(tok, "tsv() must be called on an array of arrays or objects, such as [[1, 2, 3], [4, 5, 6]], '%s' given as argument", array.Inspect()) + } + + headerObj := args[2].(*object.Array) + header := []string{} + + if len(headerObj.Elements) > 0 { + for _, v := range headerObj.Elements { + header = append(header, v.Inspect()) + } + } else if isHash { + // if our array is made of hashes, we will include a header in + // our TSV output, made of all possible keys found in every object + for _, rows := range array.Elements { + for _, pair := range rows.(*object.Hash).Pairs { + header = append(header, pair.Key.Inspect()) + } + } + + // When no header is provided, we will simply + // use the list of keys from all object, alphabetically + // sorted + header = util.UniqueStrings(header) + sort.Strings(header) + } + + if len(header) > 0 { + err := tsv.Write(header) + + if err != nil { + return newError(tok, err.Error()) + } + } + + for _, row := range array.Elements { + // Row values + values := []string{} + + // In the case of an array, creating the row is fairly + // straightforward: we loop through the elements and extract + // their value + if isArray { + for _, element := range row.(*object.Array).Elements { + values = append(values, element.Inspect()) + } + + } + + // In case of an hash, we want to extract values based on + // the header. If a key is not present in an hash, we will + // simply set it to null + if isHash { + for _, key := range header { + pair, ok := row.(*object.Hash).GetPair(key) + var value object.Object + + if ok { + value = pair.Value + } else { + value = NULL + } + + values = append(values, value.Inspect()) + } + } + + // Add the row to the final output, by concatenating + // it with the given separator + err := tsv.Write(values) + + if err != nil { + return newError(tok, err.Error()) + } + } + + tsv.Flush() + return &object.String{Value: strings.TrimSpace(out.String())} +} + func execFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object { err := validateArgs(tok, "exec", args, 1, [][]string{{object.STRING_OBJ}}) if err != nil { diff --git a/object/object.go b/object/object.go index 63500df0..0d964c85 100644 --- a/object/object.go +++ b/object/object.go @@ -304,6 +304,9 @@ func (ao *Array) Next() (Object, Object) { func (ao *Array) Reset() { ao.position = 0 } + +// Homogeneous returns whether the array is homogeneous, +// meaning all of its elements are of a single type func (ao *Array) Homogeneous() bool { if ao.Empty() { return true diff --git a/util/util.go b/util/util.go index 1e28d0d7..8898f476 100644 --- a/util/util.go +++ b/util/util.go @@ -86,3 +86,17 @@ func InterpolateStringVars(str string, env *object.Environment) string { }) return str } + +// UniqueStrings takes an input list of strings +// and returns a version without duplicate values +func UniqueStrings(slice []string) []string { + keys := make(map[string]bool) + list := []string{} + for _, entry := range slice { + if _, value := keys[entry]; !value { + keys[entry] = true + list = append(list, entry) + } + } + return list +}