Skip to content

Commit

Permalink
Merge branch 'master' into k8s-add-external-ports
Browse files Browse the repository at this point in the history
  • Loading branch information
godber authored Apr 12, 2021
2 parents bc8e0aa + 7113517 commit 379d4a5
Show file tree
Hide file tree
Showing 85 changed files with 1,750 additions and 972 deletions.
8 changes: 4 additions & 4 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
"debug": "^4.3.1",
"ms": "^2.1.3",
"node-notifier": "^9.0.1",
"typescript": "~4.2.3"
"typescript": "~4.2.4"
},
"dependencies": {},
"devDependencies": {
Expand All @@ -46,13 +46,13 @@
"@types/lodash": "^4.14.168",
"@types/node": "^14.14.37",
"@types/uuid": "^8.3.0",
"eslint": "^7.23.0",
"eslint": "^7.24.0",
"jest": "^26.6.3",
"jest-extended": "^0.11.5",
"jest-watch-typeahead": "^0.6.0",
"jest-watch-typeahead": "^0.6.2",
"node-notifier": "^9.0.1",
"ts-jest": "^26.5.4",
"typescript": "~4.2.3"
"typescript": "~4.2.4"
},
"engines": {
"node": ">=10.16.0",
Expand Down
12 changes: 12 additions & 0 deletions packages/data-mate/bench/fixtures/data.dfjson

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion packages/data-mate/bench/fixtures/data.json

Large diffs are not rendered by default.

51 changes: 43 additions & 8 deletions packages/data-mate/bench/generate-data.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,12 @@ const { times, random } = require('@terascope/utils');
const { FieldType } = require('@terascope/types');
const fs = require('fs');
const path = require('path');
const shuffle = require('lodash/shuffle');
const Chance = require('chance');
const util = require('util');
const stream = require('stream');
const { once } = require('events');
const { DataFrame } = require('./src');

const chance = new Chance();

Expand Down Expand Up @@ -64,9 +69,9 @@ const dataTypeConfig = {
};

const maxInt = (2 ** 31) - 1;
const numRecords = 1800;
const numRecords = 1000; // this will be doubled
const year = new Date().getFullYear();
const records = times(numRecords, () => {
let records = times(numRecords, () => {
const age = chance.age();
return {
_key: chance.guid({ version: 4 }),
Expand Down Expand Up @@ -96,8 +101,7 @@ const records = times(numRecords, () => {
});

// add some duplicates
records.splice(500, 0, ...records.slice(100, 200));
records.splice(1300, 0, ...records.slice(500, 600));
records = shuffle(records.concat(records));

function randArrSize(fn, arg) {
return times(random(0, 5), () => randNull(fn, arg));
Expand All @@ -117,7 +121,38 @@ console.dir({
maxArrayLength: 1,
depth: 5
});
fs.writeFileSync(path.join(__dirname, 'fixtures/data.json'), JSON.stringify({
config: dataTypeConfig,
data: records
}));
(async function writeRow() {
console.time('write row');
await new Promise((resolve, reject) => {
const data = JSON.stringify({
config: dataTypeConfig,
data: records
});
fs.writeFile(path.join(__dirname, 'fixtures/data.json'), data, (err) => {
if (err) reject();
else resolve();
});
});
console.timeEnd('write row');
}());

const finished = util.promisify(stream.finished);

(async function writeColumnStream() {
const frame = DataFrame.fromJSON(dataTypeConfig, records);

const writable = fs.createWriteStream(
path.join(__dirname, 'fixtures/data.dfjson'),
{ encoding: 'utf8' }
);
console.time('write column stream');
for await (const chunk of frame.serializeIterator()) {
if (!writable.write(`${chunk}\n`)) { // (B)
// Handle back pressure
await once(writable, 'drain');
}
}
writable.end();
await finished(writable);
console.timeEnd('write column stream');
}());
133 changes: 133 additions & 0 deletions packages/data-mate/bench/serialize-perf-test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
/* eslint-disable no-console */

'use strict';

const { pDelay, toHumanTime } = require('@terascope/utils');
const MultiMap = require('mnemonist/multi-map');
const fs = require('fs');
const path = require('path');
const { DataFrame } = require('./src');

function readFile(fileName) {
const filePath = fs.existsSync(path.join(__dirname, 'fixtures', `.local.${fileName}`))
? path.join(__dirname, 'fixtures', `.local.${fileName}`)
: path.join(__dirname, 'fixtures', fileName);

return async function _readFile() {
console.time(`readFile ${fileName}`);
try {
return await new Promise((resolve, reject) => {
fs.readFile(filePath, { encoding: 'utf8' }, (err, buf) => {
if (err) {
reject(err);
return;
}
resolve(buf);
});
});
} finally {
console.timeEnd(`readFile ${fileName}`);
}
};
}

function readFileStream(fileName) {
const filePath = fs.existsSync(path.join(__dirname, 'fixtures', `.local.${fileName}`))
? path.join(__dirname, 'fixtures', `.local.${fileName}`)
: path.join(__dirname, 'fixtures', fileName);

return async function* _readFile() {
console.time(`readFileStream ${fileName}`);
try {
const stream = fs.createReadStream(filePath, { encoding: 'utf8' });
let chunks = '';
for await (const chunk of stream) {
const parts = chunk.split('\n');
if (parts.length === 0) {
// do nothing
} else if (parts.length === 1) {
chunks += chunk;
} else {
for (let i = 0; i < parts.length; i++) {
if (i === (parts.length - 1)) {
chunks += parts[i];
} else {
yield chunks + parts[i];
chunks = '';
}
}
}
}
} finally {
console.timeEnd(`readFileStream ${fileName}`);
}
};
}

async function fromJSON(buf) {
console.time('fromJSON');
try {
const { data, config } = JSON.parse(buf);
return DataFrame.fromJSON(config, data);
} finally {
console.timeEnd('fromJSON');
}
}

async function deserialize(buf) {
console.time('deserialize');
try {
return await DataFrame.deserialize(buf);
} finally {
console.timeEnd('deserialize');
}
}

async function deserializeStream(iterator) {
console.time('deserializeStream');
try {
return await DataFrame.deserializeIterator(iterator);
} finally {
console.timeEnd('deserializeStream');
}
}

async function runTest(times) {
let start;
return Promise.resolve()
.then(() => { start = Date.now(); })
.then(readFile('data.json'))
.then(fromJSON)
.then(() => times.set('row', Date.now() - start))
.then(() => pDelay(100))
.then(() => { start = Date.now(); })
.then(readFile('data.dfjson'))
.then(deserialize)
.then(() => times.set('column', Date.now() - start))
.then(() => { start = Date.now(); })
.then(readFileStream('data.dfjson'))
.then(deserializeStream)
.then(() => times.set('column stream', Date.now() - start));
}

(async function runTests() {
const times = new MultiMap();
for (let i = 0; i < 3; i++) {
await runTest(times);
}
for (const [group, groupTimes] of times.associations()) {
let min;
let max;
let sum = 0;
for (const time of groupTimes) {
sum += time;
if (max == null || time > max) max = time;
if (min == null || time < min) min = time;
}
const avg = sum / groupTimes.length;
console.log(`[${group}]
avg: ${toHumanTime(avg)}
min: ${toHumanTime(min)}
max: ${toHumanTime(max)}`);
}
}());
14 changes: 7 additions & 7 deletions packages/data-mate/package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@terascope/data-mate",
"displayName": "Data-Mate",
"version": "0.26.0",
"version": "0.27.1",
"description": "Library of data validations/transformations",
"homepage": "https://github.com/terascope/teraslice/tree/master/packages/data-mate#readme",
"repository": {
Expand Down Expand Up @@ -29,17 +29,17 @@
"test:watch": "ts-scripts test --watch . --"
},
"dependencies": {
"@terascope/data-types": "^0.27.6",
"@terascope/types": "^0.7.2",
"@terascope/utils": "^0.36.6",
"@terascope/data-types": "^0.28.0",
"@terascope/types": "^0.8.0",
"@terascope/utils": "^0.37.0",
"@turf/bbox": "^6.2.0",
"@turf/bbox-polygon": "^6.3.0",
"@turf/boolean-point-in-polygon": "^6.2.0",
"@turf/circle": "^6.3.0",
"@turf/clean-coords": "^6.2.0",
"@turf/helpers": "^6.2.0",
"awesome-phonenumber": "^2.48.0",
"date-fns": "^2.19.0",
"awesome-phonenumber": "^2.49.0",
"date-fns": "^2.20.1",
"ip-bigint": "^3.0.3",
"ip6addr": "^0.2.3",
"ipaddr.js": "^2.0.0",
Expand All @@ -50,7 +50,7 @@
"uuid": "^8.3.2",
"valid-url": "^1.0.9",
"validator": "^13.5.2",
"xlucene-parser": "^0.34.5"
"xlucene-parser": "^0.35.0"
},
"devDependencies": {
"@types/ip6addr": "^0.2.2",
Expand Down
27 changes: 4 additions & 23 deletions packages/data-mate/src/builder/Builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ import {
freezeObject, ReadableData, WritableData
} from '../core';
import {
ListVector,
Vector, VectorType
ListVector, Vector, VectorType
} from '../vector';

/**
Expand Down Expand Up @@ -89,7 +88,9 @@ export abstract class Builder<T = unknown> {
this.name = options.name;
this.type = type;
this.config = freezeObject(options.config);
this.childConfig = options.childConfig ? freezeObject(options.childConfig) : undefined;
this.childConfig = options.childConfig
? freezeObject(options.childConfig)
: undefined;
this.data = data;
this.currentIndex = 0;
}
Expand Down Expand Up @@ -182,26 +183,6 @@ export abstract class Builder<T = unknown> {
this.currentIndex = 0;
return vector;
}

[Symbol.for('nodejs.util.inspect.custom')](): any {
const proxy = {
name: this.name,
type: this.type,
config: this.config,
childConfig: this.childConfig,
size: this.data.size,
currentIndex: this.currentIndex,
values: this.data.values
};

// Trick so that node displays the name of the constructor
Object.defineProperty(proxy, 'constructor', {
value: this.constructor,
enumerable: false
});

return proxy;
}
}

/**
Expand Down
39 changes: 22 additions & 17 deletions packages/data-mate/src/builder/types/DateBuilder.ts
Original file line number Diff line number Diff line change
@@ -1,30 +1,35 @@
import { DateValue, WritableData } from '../../core';
import {
getTypeOf, isNumber, isString, isValidDateInstance, makeISODate
} from '@terascope/utils';
import { DateFormat } from '@terascope/types';
import { WritableData } from '../../core';
import { VectorType } from '../../vector';
import { BuilderOptions } from '../Builder';
import { BuilderWithCache } from '../BuilderWithCache';

export class DateBuilder extends BuilderWithCache<DateValue> {
referenceDate = new Date();
import { Builder, BuilderOptions } from '../Builder';

export class DateBuilder extends Builder<number|string> {
constructor(
data: WritableData<DateValue>,
data: WritableData<number>,
options: BuilderOptions
) {
super(VectorType.Date, data, options);
}

_valueFrom(value: unknown): DateValue {
// FIXME this should validate the format is correct
if (value instanceof DateValue) return value;
_valueFrom(value: unknown): number|string {
if (value instanceof Date) {
if (isValidDateInstance(value)) return value.toISOString();

throw new TypeError(`Expected ${value} (${getTypeOf(value)}) to be a valid date instance`);
}

if (!isString(value) && !isNumber(value)) {
throw new TypeError(`Expected ${value} (${getTypeOf(value)}) to be a valid date`);
}

if (this.config.format) {
return DateValue.fromValueWithFormat(
value,
this.config.format,
this.referenceDate
);
// ensure we stored the iso 8601 format where possible
if (this.config.format === DateFormat.iso_8601 || !this.config.format) {
return makeISODate(value);
}

return DateValue.fromValue(value as any);
return value;
}
}
Loading

0 comments on commit 379d4a5

Please sign in to comment.