Skip to content

Commit

Permalink
Add TSV, PSV, anySV file support
Browse files Browse the repository at this point in the history
  • Loading branch information
koresar committed Aug 4, 2021
1 parent 298e1c3 commit adb124b
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 22 deletions.
18 changes: 12 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# lil-csv

Mini 1k module for CSV file manipulations
Mini 1k module for CSV, TSV, PSV file manipulations

- Parse CSV text to deep JSON objects.
- Customise each column parsing with your code.
- Serialise deep JSON objects to CSV.
- Rename CSV headers and object keys on the fly.
- Simply generate CSV from arrays of strings.
- Parse CSV to simple arrays of strings.
- TSV (tab-separated values), PSV (pipe-separated values), and other-separated values.

## Usage

Expand Down Expand Up @@ -275,11 +276,13 @@ Acme Inc,true,2021-11-22,1000150.10`

## API

### `parse(text, [options = { header: true, escapeChar: "\\" }])`
### `parse(text, [options = { header: true, delimiter: ",", quoteChar: '"', escapeChar: "\\" }])`

- `text` - String, the string to parse.
- `options` - Object, optional parsing settings.
- `options.escapeChar` - String character, the escape character used within that CSV.
- `options.delimiter` - String character, value separator. E.g. `\t` for TSV, `|` for PSV, etc. Default is comma: `,`.
- `options.quoteChar` - String character. Which char to use to wrap strings. Default is double quotes: `"`.
- `options.escapeChar` - String character, the escape character used within that file. Default is backslash: `\`.
- `options.header` - Boolean, or Array of string, or Object. Default is `true`.
- Boolean
- `true` - create JSON objects from CSV rows. Assume first row of the text is a header, would be used as object keys.
Expand All @@ -294,12 +297,15 @@ Acme Inc,true,2021-11-22,1000150.10`
- `header[].newName` - rename CSV header. E.g. `"User First Name": "user.firstName"`
- key is `"*"`, value is used as a default column parser for unknown columns.

### `generate(rows, [options = { header: true, escapeChar: "\\", lineTerminator: "\n" }])`
### `generate(rows, [options = { header: true, delimiter: ",", quoteChar: '"', escapeChar: "\\", wrapStrings: false, lineTerminator: "\n" }])`

- `rows` - array of arrays. The data to generate the CSV from. Each row must be euther array of object.
- `options` - Object, optional settings.
- `options.escapeChar` - String character, the escape character used within that CSV.
- `options.lineTerminator` - String character, the new line character used within that CSV.
- `options.delimiter` - String character, value separator. E.g. `\t` for TSV, `|` for PSV, etc. Default is comma: `,`.
- `options.quoteChar` - String character. Which char to use to wrap strings. Default is double quotes: `"`.
- `options.escapeChar` - String character, the escape character used within that file. Default is backslash: `\`.
- `options.wrapStrings` - Boolean, set it to `true` if all string cells must be wrapped with the `quoteChar`. Default is `false`.
- `options.lineTerminator` - String character, the new line character used within that file.
- `options.header` - Boolean, or Array of string, or Object. Default is `true`.
- Boolean
- `true` - autodetect column names (header) from the `rows`. If `rows` data are objects, then keys would be the column names. If `rows` are arrays, then the first row is assumed to be the header.
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "lil-csv",
"version": "1.3.1",
"description": "Mini 1k module for CSV file manipulations",
"description": "Mini 1k module for CSV, TSV, PSV file manipulations",
"source": "src/lil-csv.js",
"main": "dist/lil-csv.js",
"exports": "./dist/lil-csv.modern.js",
Expand Down
41 changes: 28 additions & 13 deletions src/lil-csv.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,12 @@ function keysDeep(obj, prefix) {
/**
* @param str {String} The CSV file contents.
* @param [header=true] {Boolean | String[] | Object.<string,String> | Object.<string,Function> | Object.<string,{[parse]:Function,[newName]:String}>}
* @param [escapeChar="\\"] {String}
* @param [delimiter=","] {String} specifies the character sequence which should separate fields (aka columns). Default = `","`. Examples: `"\t"` or `"|"`.
* @param [quoteChar='"'] {String} specifies a one-character string to use as the quoting character. Default = `\"`
* @param [escapeChar="\\"] {String} specifies a one-character string to use for escaping, mutually exclusive with `quoteChar`. Default: `"\\"`
* @return {Object[] | String[] | *[]} The parsed strings, objects, values of all kind(s).
*/
export function parse(str, { header = true, escapeChar = "\\" } = {}) {
export function parse(str, { header = true, delimiter = ",", quoteChar = '"', escapeChar = "\\" } = {}) {
let entries = [];
let quote = false; // 'true' means we're inside a quoted field

Expand All @@ -67,14 +69,14 @@ export function parse(str, { header = true, escapeChar = "\\" } = {}) {
}

// If it's just one quotation mark, begin/end quoted field
if (cc === '"') {
if (cc === quoteChar) {
quote = !quote;
continue;
}

if (!quote) {
// If it's a comma, move on to the next column
if (cc === ",") {
if (cc === delimiter) {
++col;
entries[row][col] = ""; // If line ends with comma we need to add an empty column to the row.
continue;
Expand Down Expand Up @@ -140,17 +142,30 @@ let valueToString = (v) => {

/**
* Generate CSV from your data (arrays or objects) to a string.
* The options are named using this standard: https://specs.frictionlessdata.io//csv-dialect/#specification
* @param rows {Object[] | String[]}
* @param [header=true] {Boolean | String[] | Object.<string,Boolean> | Object.<string,String> | Object.<string,Function> | Object.<string,{[stringify]:Function,[newName]:String}>}
* @param [lineTerminator="\n"] {String}
* @param [escapeChar="\\"] {String}
* @param [wrapStrings=false] {Boolean}
* @param [delimiter=","] {String} specifies the character sequence which should separate fields (aka columns). Default = `","`. Examples: `"\t"` or `"|"`.
* @param [quoteChar='"'] {String} specifies a one-character string to use as the quoting character. Default = `\"`
* @param [escapeChar="\\"] {String} specifies a one-character string to use for escaping, mutually exclusive with `quoteChar`. Default: `"\\"`
* @param [wrapStrings=false] {Boolean} specifies if all string cells must be wrapped with the `quoteChar`
* @param [lineTerminator="\n"] {String} specifies the character sequence which should terminate rows. Default = `"\n"`
* @return {String} The CSV file contents.
*/
export function generate(rows, { header = true, lineTerminator = "\n", escapeChar = "\\", wrapStrings = false } = {}) {
export function generate(
rows,
{
header = true,
delimiter = ",",
lineTerminator = "\n",
quoteChar = '"',
escapeChar = "\\",
wrapStrings = false,
} = {}
) {
let serialiseString = (v) => {
v = v.replace(/"/g, escapeChar + '"'); // Escape quote character
return wrapStrings || v.includes(",") ? '"' + v + '"' : v; // Add quotes if value has commas
v = v.replace(new RegExp(quoteChar, "g"), escapeChar + quoteChar); // Escape quote character
return wrapStrings || v.includes(delimiter) ? quoteChar + v + quoteChar : v; // Add quotes if value has commas
};

/**
Expand Down Expand Up @@ -197,7 +212,7 @@ export function generate(rows, { header = true, lineTerminator = "\n", escapeCha
let newHeader = dataHeader.newName || (isString(dataHeader) ? dataHeader : h);
return serialiseString(newHeader);
})
.join() + lineTerminator
.join(delimiter) + lineTerminator
: "";
return (
textHeader +
Expand All @@ -206,7 +221,7 @@ export function generate(rows, { header = true, lineTerminator = "\n", escapeCha
if (isArray(row)) {
if (detectedHeaders && row.length !== detectedHeaders.length)
throw new Error(`Each row array must have exactly ${detectedHeaders.length} items`);
return row.map((v) => serialiseString(valueToString(v))).join();
return row.map((v) => serialiseString(valueToString(v))).join(delimiter);
}
if (isObject(row)) {
if (!detectedHeaders) throw new Error("Unexpected row object");
Expand All @@ -218,7 +233,7 @@ export function generate(rows, { header = true, lineTerminator = "\n", escapeCha
if (!isFunction(stringify)) stringify = valueToString;
return serialiseString(valueToString(stringify(getDeep(row, h), row)));
})
.join();
.join(delimiter);
}
throw new Error(`Row ${i} must be either array or object`);
})
Expand Down
18 changes: 16 additions & 2 deletions test/lil-csv.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,14 +44,28 @@ describe("generate + parse", () => {
it("should work on fully customised options", () => {
const text = generate(
[
["my str", -123.123, false, new Date("2020-12-12"), "1999-09-09", {}, "whatever", ""],
["my ' \\' str", -123.123, false, new Date("2020-12-12"), "1999-09-09", {}, "whatever", ""],
[-1, "not number", "False", new Date("invalid date"), "bad DOB", [], "whatever", ""],
],
{
delimiter: "\t",
quoteChar: "'",
escapeChar: "/",
lineTerminator: "\r\n",
header: [`A string`, `num`, `bool`, `date`, `date of birth`, `bad data`, `skip this`, `skip this too`],
}
);
assert.strictEqual(
text,
`A string\tnum\tbool\tdate\tdate of birth\tbad data\tskip this\tskip this too\r\n` +
`my /' \\/' str\t-123.123\tfalse\t2020-12-12T00:00:00.000Z\t1999-09-09\t\twhatever\t\r\n` +
`-1\tnot number\tFalse\t\tbad DOB\t\twhatever\t`
);

const data = parse(text, {
delimiter: "\t",
quoteChar: "'",
escapeChar: "/",
header: {
"A string": "stringX",
num: { newName: "numberX", parse: (v) => (v && !Number.isNaN(Number(v)) ? Number(v) : "") },
Expand All @@ -67,7 +81,7 @@ describe("generate + parse", () => {
});
assert.deepStrictEqual(data, [
{
stringX: "my str",
stringX: "my ' \\' str",
numberX: -123.123,
booleanX: false,
dateX: new Date("2020-12-12T00:00:00.000Z"),
Expand Down

0 comments on commit adb124b

Please sign in to comment.