Skip to content

Commit

Permalink
array.tsv(), closes #279
Browse files Browse the repository at this point in the history
Arrays can now easily be formatted as TSV / CSV content:

```
array.tsv()
array.tsv(",")
```

Here are some samples:

```
⧐  [["LeBron", "James"], ["James", "Harden"]].tsv()
LeBron	James
James	Harden

⧐  [{"name": "Lebron", "last": "James", "jersey": 23}, {"name": "James", "last": "Harden"}].tsv("\t", ["name", "last", "jersey", "additional_key"])
name	last	jersey	additional_key
Lebron	James	23	null
James	Harden	null	null
```

There is some complexity in this function due to the fact that
we have to support custom headers, as well as the fact that we
can both format arrays of objects as well as hashes.
  • Loading branch information
odino committed Sep 15, 2019
1 parent 430e40a commit 15499a4
Show file tree
Hide file tree
Showing 5 changed files with 205 additions and 0 deletions.
41 changes: 41 additions & 0 deletions docs/types/array.md
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,47 @@ Sums the elements of the array. Only supported on arrays of numbers:
[1, 1, 1].sum() # 3
```
### tsv([separator], [header])
Formats the array into TSV:
``` bash
[["LeBron", "James"], ["James", "Harden"]].tsv()
LeBron James
James Harden
```
You can also specify the separator to be used if you
prefer not to use tabs:
``` bash
[["LeBron", "James"], ["James", "Harden"]].tsv(",")
LeBron,James
James,Harden
```
The input array needs to be an array of arrays or hashes. If
you use hashes, their keys will be used as heading of the TSV:
```bash
[{"name": "Lebron", "last": "James", "jersey": 23}, {"name": "James", "last": "Harden"}].tsv()
jersey last name
23 James Lebron
null Harden James
```
The heading will, by default, be a combination of all keys present in the hashes,
sorted alphabetically. If a key is missing in an hash, `null` will be used as value.
If you wish to specify the output format, you can pass a list of keys to be used
as header:
```bash
[{"name": "Lebron", "last": "James", "jersey": 23}, {"name": "James", "last": "Harden"}].tsv("\t", ["name", "last", "jersey", "additional_key"])
name last jersey additional_key
Lebron James 23 null
James Harden null null
```
### unique()
Returns an array with unique values:
Expand Down
8 changes: 8 additions & 0 deletions evaluator/evaluator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -949,6 +949,14 @@ c")`, []string{"a", "b", "c"}},
{`"a = 2; return 10" >> "test-source-vs-require.abs.ignore"; a = 1; x = require("test-source-vs-require.abs.ignore"); a`, 1},
{`"a = 2; return 10" >> "test-source-vs-require.abs.ignore"; a = 1; x = source("test-source-vs-require.abs.ignore"); x`, 10},
{`"a = 2; return 10" >> "test-source-vs-require.abs.ignore"; a = 1; x = require("test-source-vs-require.abs.ignore"); x`, 10},
{`[[1,2,3], [2,3,4]].tsv()`, "1\t2\t3\n2\t3\t4"},
{`[1].tsv()`, "tsv() must be called on an array of arrays or objects, such as [[1, 2, 3], [4, 5, 6]], '[1]' given"},
{`[{"c": 3, "b": "hello"}, {"b": 20, "c": 0}].tsv()`, "b\tc\nhello\t3\n20\t0"},
{`[[1,2,3], [2,3,4]].tsv(",")`, "1,2,3\n2,3,4"},
{`[[1,2,3], [2]].tsv(",")`, "1,2,3\n2"},
{`[[1,2,3], [2,3,4]].tsv("abc")`, "1a2a3\n2a3a4"},
{`[[1,2,3], [2,3,4]].tsv("")`, "the separator argument to the tsv() function needs to be a valid character, '' given"},
{`[{"c": 3, "b": "hello"}, {"b": 20, "c": 0}].tsv("\t", ["c", "b", "a"])`, "c\tb\ta\n3\thello\tnull\n0\t20\tnull"},
}
for _, tt := range tests {
evaluated := testEval(tt.input)
Expand Down
139 changes: 139 additions & 0 deletions evaluator/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package evaluator
import (
"bufio"
"crypto/rand"
"encoding/csv"
"fmt"
"io/ioutil"
"math"
Expand Down Expand Up @@ -335,10 +336,16 @@ func getFns() map[string]*object.Builtin {
Types: []string{object.STRING_OBJ},
Fn: execFn,
},
// eval(code) -- evaluates code in the context of the current ABS environment
"eval": &object.Builtin{
Types: []string{object.STRING_OBJ},
Fn: evalFn,
},
// tsv([[1,2,3,4], [5,6,7,8]]) -- converts an array into a TSV string
"tsv": &object.Builtin{
Types: []string{object.ARRAY_OBJ},
Fn: tsvFn,
},
}
}

Expand Down Expand Up @@ -1672,6 +1679,138 @@ func evalFn(tok token.Token, env *object.Environment, args ...object.Object) obj
return evaluated
}

// [[1,2], [3,4]].tsv()
// [{"a": 1, "b": 2}, {"b": 3, "c": 4}].tsv()
func tsvFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object {
// all arguments were passed
if len(args) == 3 {
err := validateArgs(tok, "tsv", args, 3, [][]string{{object.ARRAY_OBJ}, {object.STRING_OBJ}, {object.ARRAY_OBJ}})
if err != nil {
return err
}
}

// If no header was passed, let's set it to empty list by default
if len(args) == 2 {
err := validateArgs(tok, "tsv", args, 2, [][]string{{object.ARRAY_OBJ}, {object.STRING_OBJ}})
if err != nil {
return err
}
args = append(args, &object.Array{Elements: []object.Object{}})
}

// If no separator and header was passed, let's set them to tab and empty list by default
if len(args) == 1 {
err := validateArgs(tok, "tsv", args, 1, [][]string{{object.ARRAY_OBJ}})
if err != nil {
return err
}
args = append(args, &object.String{Value: "\t"})
args = append(args, &object.Array{Elements: []object.Object{}})
}

array := args[0].(*object.Array)
separator := args[1].(*object.String).Value

if len(separator) < 1 {
return newError(tok, "the separator argument to the tsv() function needs to be a valid character, '%s' given", separator)
}
// the final outut
out := &strings.Builder{}
tsv := csv.NewWriter(out)
tsv.Comma = rune(separator[0])

// whether our array is made of ALL arrays or ALL hashes
var isArray bool
var isHash bool
homogeneous := array.Homogeneous()

if len(array.Elements) > 0 {
_, isArray = array.Elements[0].(*object.Array)
_, isHash = array.Elements[0].(*object.Hash)
}

// if the array is not homogeneous, we cannot process it
if !homogeneous || (!isArray && !isHash) {
return newError(tok, "tsv() must be called on an array of arrays or objects, such as [[1, 2, 3], [4, 5, 6]], '%s' given as argument", array.Inspect())
}

headerObj := args[2].(*object.Array)
header := []string{}

if len(headerObj.Elements) > 0 {
for _, v := range headerObj.Elements {
header = append(header, v.Inspect())
}
} else if isHash {
// if our array is made of hashes, we will include a header in
// our TSV output, made of all possible keys found in every object
for _, rows := range array.Elements {
for _, pair := range rows.(*object.Hash).Pairs {
header = append(header, pair.Key.Inspect())
}
}

// When no header is provided, we will simply
// use the list of keys from all object, alphabetically
// sorted
header = util.UniqueStrings(header)
sort.Strings(header)
}

if len(header) > 0 {
err := tsv.Write(header)

if err != nil {
return newError(tok, err.Error())
}
}

for _, row := range array.Elements {
// Row values
values := []string{}

// In the case of an array, creating the row is fairly
// straightforward: we loop through the elements and extract
// their value
if isArray {
for _, element := range row.(*object.Array).Elements {
values = append(values, element.Inspect())
}

}

// In case of an hash, we want to extract values based on
// the header. If a key is not present in an hash, we will
// simply set it to null
if isHash {
for _, key := range header {
pair, ok := row.(*object.Hash).GetPair(key)
var value object.Object

if ok {
value = pair.Value
} else {
value = NULL
}

values = append(values, value.Inspect())
}
}

// Add the row to the final output, by concatenating
// it with the given separator
err := tsv.Write(values)

if err != nil {
return newError(tok, err.Error())
}
}

tsv.Flush()
return &object.String{Value: strings.TrimSpace(out.String())}
}

func execFn(tok token.Token, env *object.Environment, args ...object.Object) object.Object {
err := validateArgs(tok, "exec", args, 1, [][]string{{object.STRING_OBJ}})
if err != nil {
Expand Down
3 changes: 3 additions & 0 deletions object/object.go
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ func (ao *Array) Next() (Object, Object) {
func (ao *Array) Reset() {
ao.position = 0
}

// Homogeneous returns whether the array is homogeneous,
// meaning all of its elements are of a single type
func (ao *Array) Homogeneous() bool {
if ao.Empty() {
return true
Expand Down
14 changes: 14 additions & 0 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,17 @@ func InterpolateStringVars(str string, env *object.Environment) string {
})
return str
}

// UniqueStrings takes an input list of strings
// and returns a version without duplicate values
func UniqueStrings(slice []string) []string {
keys := make(map[string]bool)
list := []string{}
for _, entry := range slice {
if _, value := keys[entry]; !value {
keys[entry] = true
list = append(list, entry)
}
}
return list
}

0 comments on commit 15499a4

Please sign in to comment.