Skip to content

Commit

Permalink
feat: implement tools to convert from csv to json
Browse files Browse the repository at this point in the history
issue: #237
  • Loading branch information
CodeBear801 committed Mar 20, 2020
1 parent 44f415a commit 413edeb
Show file tree
Hide file tree
Showing 2 changed files with 106 additions and 0 deletions.
13 changes: 13 additions & 0 deletions integration/cmd/csv-to-json/flags.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package main

import "flag"

var flags struct {
inputPath string
outputPath string
}

func init() {
flag.StringVar(&flags.inputPath, "i", "", "path for input file in csv format")
flag.StringVar(&flags.outputPath, "o", "output.json", "path for output file in json format")
}
93 changes: 93 additions & 0 deletions integration/cmd/csv-to-json/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
package main

import (
"bytes"
"encoding/csv"
"encoding/json"
"flag"
"io/ioutil"
"os"
"strconv"
"strings"

"github.com/golang/glog"
)

func main() {
flag.Parse()
ETL()
}

// ETL extracts result from csv and convert to json format
// Optimization for future: ETL could be a generic framework
// Extraction part accepts input, and has many kind of reads(json, csv, etc)
// Extraction put data into a channel of string
// Transformation part read data from Extraction's result, doing things(user
// defined functions) and output result to another channel
// There might be trick logic in this step when input information is not separated
// by lines:
// Input string might be: last part for object 1 | first part for object 2
// It need to implement buffer to handling this
// Loader part read data from channel and writes result to target format
func ETL() {
// extract: load data from csv
csvFile, err := os.Open(flags.inputPath)
if err != nil {
glog.Fatal("While open file %s, met error %v", flags.inputPath, err)
}
defer csvFile.Close()

reader := csv.NewReader(csvFile)
content, _ := reader.ReadAll()
glog.Infof("Finish loading file of %s, it contains %d line of data\n", flags.inputPath, len(content))

if len(content) < 1 {
glog.Fatal("No content in given file %s\n", flags.inputPath)
}

// transformation: convert csv to json format
header := make([]string, 0)
for _, attr := range content[0] {
header = append(header, attr)
}
content = content[1:]

var buffer bytes.Buffer
buffer.WriteString("[")
for i, line := range content {
buffer.WriteString("{")
for j, element := range line {
buffer.WriteString(`"` + header[j] + `":`)

_, isFloatErr := strconv.ParseFloat(element, 64)
_, isBoolErr := strconv.ParseBool(element)
if isFloatErr == nil {
buffer.WriteString(element)
} else if isBoolErr == nil {
buffer.WriteString(strings.ToLower(element))
} else {
buffer.WriteString((`"` + element + `"`))
}

if j < len(line)-1 {
buffer.WriteString(",")
}
}

buffer.WriteString("}")
if i < len(content)-1 {
buffer.WriteString(",")
}
}

buffer.WriteString(`]`)
glog.Info("Finish converting from csv to internal json string\n")

// Load: save content to target file
rawMessage := json.RawMessage(buffer.String())
if err := ioutil.WriteFile(flags.outputPath, rawMessage, os.FileMode(0644)); err != nil {
glog.Fatal("While writing result to file %s, met error %v", flags.outputPath, err)
}
glog.Info("Finish generating target file %s\n", flags.outputPath)

}

0 comments on commit 413edeb

Please sign in to comment.