diff --git a/asset/asset.json b/asset/asset.json index 0355083f..fcdac78a 100644 --- a/asset/asset.json +++ b/asset/asset.json @@ -1,5 +1,5 @@ { "name": "standard", - "version": "1.1.0", + "version": "1.2.0", "description": "Teraslice standard processor asset bundle" } diff --git a/asset/package.json b/asset/package.json index 4275ae8e..2fb67e3a 100644 --- a/asset/package.json +++ b/asset/package.json @@ -1,7 +1,7 @@ { "name": "standard", "displayName": "Asset", - "version": "1.1.0", + "version": "1.2.0", "private": true, "description": "Teraslice standard processor asset bundle", "repository": { @@ -21,25 +21,31 @@ "test": "yarn --cwd ../ test" }, "dependencies": { - "@faker-js/faker": "^9.2.0", - "@terascope/job-components": "^1.5.1", - "@terascope/standard-asset-apis": "^1.0.2", - "@terascope/utils": "^1.3.2", - "@types/chance": "^1.1.4", - "@types/express": "^4.17.19", - "chance": "^1.1.12", - "express": "^4.21.1", - "mocker-data-generator": "^3.0.3", - "prom-client": "^15.1.2", - "randexp": "^0.5.3", - "short-unique-id": "^5.2.0", - "timsort": "^0.3.0", - "ts-transforms": "^1.3.2", - "tslib": "^2.8.1" + "@faker-js/faker": "~9.2.0", + "@terascope/data-mate": "~1.4.0", + "@terascope/job-components": "~1.6.0", + "@terascope/standard-asset-apis": "~1.0.2", + "@terascope/teraslice-state-storage": "~1.4.0", + "@terascope/utils": "~1.4.0", + "@types/chance": "~1.1.4", + "@types/express": "~4.17.19", + "chance": "~1.1.12", + "express": "~4.21.1", + "mocker-data-generator": "~3.0.3", + "ms": "^2.1.3", + "prom-client": "~15.1.2", + "randexp": "~0.5.3", + "short-unique-id": "~5.2.0", + "timsort": "~0.3.0", + "ts-transforms": "~1.4.0", + "tslib": "~2.8.1" }, "engines": { "node": ">=18.0.0", "yarn": ">=1.22.19" }, - "terascope": {} + "terascope": {}, + "devDependencies": { + "@types/ms": "^0.7.34" + } } diff --git a/asset/src/accumulate/processor.ts b/asset/src/accumulate/processor.ts index 0d226ae7..cb3ccc6d 100644 --- a/asset/src/accumulate/processor.ts +++ b/asset/src/accumulate/processor.ts @@ -1,6 +1,4 @@ -import { - BatchProcessor, DataEntity, Context -} from '@terascope/job-components'; +import { BatchProcessor, DataEntity, Context } from '@terascope/job-components'; import { ExecutionConfig } from '@terascope/types'; import { AccumulateConfig } from './interfaces.js'; import DataWindow from '../__lib/data-window.js'; @@ -25,9 +23,14 @@ export default class Accumulate extends BatchProcessor { } async onBatch(dataArray: DataEntity[]): Promise { - if (dataArray.length === 0) this.accumulator.emptySlice(); - else this.accumulator.add(dataArray); + if (dataArray.length === 0) { + this.accumulator.emptySlice(); + } else { + this.accumulator.add(dataArray); + } + let results: DataEntity[] = []; + if ((this.accumulator.readyToEmpty() || this.flushData) && this.accumulator.size > 0) { results = DataWindow.make( this.opConfig.data_window_key, diff --git a/asset/src/accumulate_by_key/processor.ts b/asset/src/accumulate_by_key/processor.ts index dee3710c..da4bda65 100644 --- a/asset/src/accumulate_by_key/processor.ts +++ b/asset/src/accumulate_by_key/processor.ts @@ -1,6 +1,4 @@ -import { - BatchProcessor, Context, DataEntity -} from '@terascope/job-components'; +import { BatchProcessor, Context, DataEntity } from '@terascope/job-components'; import { ExecutionConfig } from '@terascope/types'; import { AccumulateByKeyConfig } from './interfaces.js'; import AccumulatorByKey from '../__lib/accumulator-key.js'; @@ -27,10 +25,16 @@ export default class AccumulateByKey extends BatchProcessor { // on shutdown event return accumulated data - if (dataArray.length === 0) this.accumulator.emptySlice(); - else this.accumulator.add(dataArray); + if (dataArray.length === 0) { + this.accumulator.emptySlice(); + } else { + this.accumulator.add(dataArray); + } + + if (this.accumulator.readyToEmpty() || this.flushData) { + return this.accumulator.flush(); + } - if (this.accumulator.readyToEmpty() || this.flushData) return this.accumulator.flush(); return []; } } diff --git a/asset/src/add_key/processor.ts b/asset/src/add_key/processor.ts index 2c0c545a..e2a103a0 100644 --- a/asset/src/add_key/processor.ts +++ b/asset/src/add_key/processor.ts @@ -1,26 +1,12 @@ +import { BatchProcessor, DataEntity, AnyObject } from '@terascope/job-components'; import { - BatchProcessor, - DataEntity, - AnyObject -} from '@terascope/job-components'; -import { - get, - isObjectEntity, - isEmpty, - isString, - toNumber, - geoHash, - setPrecision, - isGeoShapePoint, - isPlainObject, + get, isObjectEntity, isEmpty, + isString, toNumber, geoHash, + setPrecision, isGeoShapePoint, isPlainObject, flatten } from '@terascope/utils'; -import { - GeoShapePoint, - GeoShapeType -} from '@terascope/types'; - -import crypto from 'crypto'; +import { GeoShapePoint, GeoShapeType } from '@terascope/types'; +import crypto from 'node:crypto'; import DataWindow from '../__lib/data-window.js'; export default class AddKey extends BatchProcessor { diff --git a/asset/src/add_short_id/processor.ts b/asset/src/add_short_id/processor.ts index ccc9227c..de0ba2ef 100644 --- a/asset/src/add_short_id/processor.ts +++ b/asset/src/add_short_id/processor.ts @@ -1,8 +1,4 @@ -import { - MapProcessor, - DataEntity, - Context -} from '@terascope/job-components'; +import { MapProcessor, DataEntity, Context } from '@terascope/job-components'; import { ExecutionConfig, OpConfig } from '@terascope/types'; import ShortUniqueId from 'short-unique-id'; import DataWindow from '../__lib/data-window.js'; diff --git a/asset/src/copy_field/interfaces.ts b/asset/src/copy_field/interfaces.ts new file mode 100644 index 00000000..1eba5bc0 --- /dev/null +++ b/asset/src/copy_field/interfaces.ts @@ -0,0 +1,7 @@ +import { OpConfig } from '@terascope/types'; + +export interface CopyFieldConfig extends OpConfig { + source: string; + destination: string; + delete_source: boolean; +} diff --git a/asset/src/copy_field/processor.ts b/asset/src/copy_field/processor.ts index c9e3c087..811cea8d 100644 --- a/asset/src/copy_field/processor.ts +++ b/asset/src/copy_field/processor.ts @@ -1,12 +1,9 @@ -import { - MapProcessor, - OpConfig, - DataEntity, -} from '@terascope/job-components'; +import { MapProcessor, DataEntity } from '@terascope/job-components'; import { get, set } from '@terascope/utils'; +import { CopyFieldConfig } from './interfaces.js'; import DataWindow from '../__lib/data-window.js'; -export default class CopyField extends MapProcessor { +export default class CopyField extends MapProcessor { map(doc: DataEntity): DataEntity { if (doc instanceof DataWindow) { return this.handleDataWindow(doc); diff --git a/asset/src/copy_field/schema.ts b/asset/src/copy_field/schema.ts index 9a55cbfa..cfae6769 100644 --- a/asset/src/copy_field/schema.ts +++ b/asset/src/copy_field/schema.ts @@ -1,7 +1,7 @@ import { ConvictSchema } from '@terascope/job-components'; -import { DateRouterConfig } from '@terascope/standard-asset-apis'; +import { CopyFieldConfig } from './interfaces.js'; -export default class Schema extends ConvictSchema { +export default class Schema extends ConvictSchema { build(): Record { return { source: { diff --git a/asset/src/copy_metadata_field/interfaces.ts b/asset/src/copy_metadata_field/interfaces.ts new file mode 100644 index 00000000..70078788 --- /dev/null +++ b/asset/src/copy_metadata_field/interfaces.ts @@ -0,0 +1,6 @@ +import { OpConfig } from '@terascope/types'; + +export interface CopyMetadataFieldConfig extends OpConfig { + destination: string; + meta_key: string; +} diff --git a/asset/src/copy_metadata_field/processor.ts b/asset/src/copy_metadata_field/processor.ts new file mode 100644 index 00000000..2bea6235 --- /dev/null +++ b/asset/src/copy_metadata_field/processor.ts @@ -0,0 +1,10 @@ +import { MapProcessor, DataEntity } from '@terascope/job-components'; +import { CopyMetadataFieldConfig } from './interfaces.js'; + +// generalize any meta data field retrieval CopyMetadataField +export default class CopyMetadataField extends MapProcessor { + map(doc: DataEntity) { + doc[this.opConfig.destination] = doc.getMetadata(this.opConfig.meta_key); + return doc; + } +} diff --git a/asset/src/copy_metadata_field/schema.ts b/asset/src/copy_metadata_field/schema.ts new file mode 100644 index 00000000..03d9e4cd --- /dev/null +++ b/asset/src/copy_metadata_field/schema.ts @@ -0,0 +1,19 @@ +import { ConvictSchema } from '@terascope/job-components'; +import { CopyMetadataFieldConfig } from './interfaces.js'; + +export default class Schema extends ConvictSchema { + build() { + return { + destination: { + doc: 'The property to copy to', + format: 'required_String', + default: null + }, + meta_key: { + doc: 'The Dataentity metadata key to copy', + format: 'required_String', + default: '_key' + } + }; + } +} diff --git a/asset/src/count_by_field/interfaces.ts b/asset/src/count_by_field/interfaces.ts index 4820da43..08b00e2e 100644 --- a/asset/src/count_by_field/interfaces.ts +++ b/asset/src/count_by_field/interfaces.ts @@ -1,9 +1,5 @@ import { OpConfig } from '@terascope/types'; -export interface CountByField { - op_name: string; -} - export interface CountByFieldConfig extends OpConfig { field: string; collect_metrics: boolean; diff --git a/asset/src/count_by_field/processor.ts b/asset/src/count_by_field/processor.ts index 92b7502d..8e770a33 100644 --- a/asset/src/count_by_field/processor.ts +++ b/asset/src/count_by_field/processor.ts @@ -1,6 +1,4 @@ -import { - MapProcessor, DataEntity, isPromAvailable -} from '@terascope/job-components'; +import { MapProcessor, DataEntity, isPromAvailable } from '@terascope/job-components'; import { CountByFieldConfig } from './interfaces.js'; type Counters = { @@ -9,15 +7,19 @@ type Counters = { field: string; }; }; + export default class CountByField extends MapProcessor { static counters: Counters = {}; + async initialize(): Promise { const { opConfig, context } = this; + if (opConfig.collect_metrics && isPromAvailable(context)) { const defaultLabels = context.apis.foundation.promMetrics.getDefaultLabels(); const name = `${this.opConfig._op}_count_total`; const help = `${this.opConfig._op} value field count`; const labelNames = [...Object.keys(defaultLabels), 'value', 'field', 'op_name']; + await this.context.apis.foundation.promMetrics.addCounter( name, help, diff --git a/asset/src/count_unique/interfaces.ts b/asset/src/count_unique/interfaces.ts new file mode 100644 index 00000000..e39425d7 --- /dev/null +++ b/asset/src/count_unique/interfaces.ts @@ -0,0 +1,7 @@ +import { OpConfig } from '@terascope/types'; + +export interface CountUniqueConfig extends OpConfig { + preserve_fields: string[]; + field: string; + is_meta_field: boolean; +} diff --git a/asset/src/count_unique/processor.ts b/asset/src/count_unique/processor.ts new file mode 100644 index 00000000..c14d1af7 --- /dev/null +++ b/asset/src/count_unique/processor.ts @@ -0,0 +1,37 @@ +import { BatchProcessor, DataEntity, has } from '@terascope/job-components'; +import { CountUniqueConfig } from './interfaces.js'; + +export default class CountUnique extends BatchProcessor { + async onBatch(dataArray: DataEntity[]) { + const results: Record = {}; + + for (const doc of dataArray) { + const key = this._getIdentifier(doc); + + if (!has(results, key)) { + results[key] = DataEntity.make({ + count: 0, + _key: key + }, { _key: key }); + } + + results[key].count++; + + this.opConfig.preserve_fields.forEach((field) => { + if (doc[field] != null) { + results[key][field] = doc[field]; + } + }); + } + + return Object.values(results); + } + + private _getIdentifier(doc: DataEntity): any { + if (this.opConfig.is_meta_field) { + return doc.getMetadata(this.opConfig.field); + } + + return doc[this.opConfig.field]; + } +} diff --git a/asset/src/count_unique/schema.ts b/asset/src/count_unique/schema.ts new file mode 100644 index 00000000..e7310e2d --- /dev/null +++ b/asset/src/count_unique/schema.ts @@ -0,0 +1,28 @@ +import { ConvictSchema, isString } from '@terascope/job-components'; +import { CountUniqueConfig } from './interfaces.js'; + +export default class Schema extends ConvictSchema { + build() { + return { + preserve_fields: { + doc: 'A list of fields whose last seen values are added to the result in addition to the count', + default: [], + format: (input: unknown) => { + if (!Array.isArray(input) || input.some((val) => !isString(val))) { + throw new Error('Parameter "preserve_fields" must be an array of strings'); + } + } + }, + field: { + doc: 'Field that is counted, defaults to metadata _key', + default: '_key', + format: 'required_String' + }, + is_meta_field: { + doc: 'determines if the field to count on lives as a DataEntity meta field or on the record itself', + default: true, + format: Boolean + } + }; + } +} diff --git a/asset/src/data_generator/schema.ts b/asset/src/data_generator/schema.ts index 675b8818..31a1cad1 100644 --- a/asset/src/data_generator/schema.ts +++ b/asset/src/data_generator/schema.ts @@ -1,11 +1,6 @@ import { - ConvictSchema, - ValidatedJobConfig, - getOpConfig, - AnyObject, - isNotNil, - getTypeOf, - isString + ConvictSchema, ValidatedJobConfig, getOpConfig, + AnyObject, isNotNil, getTypeOf, isString } from '@terascope/job-components'; import { DataGenerator, IDType, DateOptions } from './interfaces.js'; diff --git a/asset/src/debug_routes/processor.ts b/asset/src/debug_routes/processor.ts index 724a2b4b..2b645c8d 100644 --- a/asset/src/debug_routes/processor.ts +++ b/asset/src/debug_routes/processor.ts @@ -1,7 +1,4 @@ -import { - BatchProcessor, - DataEntity, -} from '@terascope/job-components'; +import { BatchProcessor, DataEntity } from '@terascope/job-components'; import { inspect } from 'util'; export default class DebugRoutesProcessor extends BatchProcessor { diff --git a/asset/src/filter/interfaces.ts b/asset/src/filter/interfaces.ts new file mode 100644 index 00000000..051d584e --- /dev/null +++ b/asset/src/filter/interfaces.ts @@ -0,0 +1,19 @@ +import { OpConfig } from '@terascope/types'; + +export interface ExceptionRule { + field: string; + value: any; + regex?: boolean; +} + +export interface FilterConfig extends OpConfig { + field: string | string []; + value?: any; + invert: boolean; + array_index: number; + filter_by: string; + validation_function?: string; + validation_function_args?: any; + filtered_to_dead_letter_queue: boolean; + exception_rules?: ExceptionRule[]; +} diff --git a/asset/src/filter/processor.ts b/asset/src/filter/processor.ts new file mode 100644 index 00000000..be7905da --- /dev/null +++ b/asset/src/filter/processor.ts @@ -0,0 +1,180 @@ +import { + FilterProcessor, Context, ExecutionConfig, + DataEntity, get, toCamelCase +} from '@terascope/job-components'; +import { FieldValidator } from '@terascope/data-mate'; +import { FilterConfig, ExceptionRule } from './interfaces.js'; + +export default class Filter extends FilterProcessor { + functionName: string; + filterValue: any[]; + exception_rules?: ExceptionRule[]; + + constructor(context: Context, opConfig: FilterConfig, exConfig: ExecutionConfig) { + super(context, opConfig, exConfig); + + this.functionName = toCamelCase(this.opConfig.filter_by); + this.filterValue = this._filterValueToArray(); + + // convert regexps on start up + if (this.opConfig.exception_rules) { + this.exception_rules = this._updateRegexRules(); + } + } + + filter(doc: DataEntity) { + if (this.exception_rules && this._exceptionCheck(doc)) return true; + + const result = this._criteriaCheck(this._getDocValue(doc)); + + const keep = this.opConfig.invert === true ? result : !result; + + if (this.opConfig.filtered_to_dead_letter_queue && keep === false) { + const err = new Error('Record was rejected'); + this.rejectRecord(doc, err); + } + + return keep; + } + + _criteriaCheck(docValue: any | any[]) { + if (Array.isArray(docValue)) { + return docValue.some((value) => this._compareValues(value)); + } + + return this._compareValues(docValue); + } + + _getDocValue(doc: DataEntity) { + if (Array.isArray(this.opConfig.field)) { + return this.opConfig.field.map((f) => get(doc, f)); + } + + if (this.functionName === 'size') { + if (this.opConfig.field === 'doc') { + return this._getSize(doc); + } + + return this._getSize(doc[this.opConfig.field]); + } + + if (this.opConfig.array_index > -1) { + if (Array.isArray(doc[this.opConfig.field])) { + return doc[this.opConfig.field][this.opConfig.array_index]; + } + + return undefined; + } + + return get(doc, this.opConfig.field); + } + + _filterValueToArray() { + let value = this.opConfig.value; + + if (this.functionName === 'regex') { + if (Array.isArray(value)) { + return value.map((v: string) => this._toRegex(v)); + } + value = this._toRegex(value); + return [value]; + } + + if (Array.isArray(this.opConfig.value)) { + return value; + } + + return [value]; + } + + _compareValues(docValue: any) { + return this.filterValue.some( + // @ts-expect-error + (filter) => this[this.functionName](docValue, filter) + ); + } + + match(docValue: any, filterValue: any) { + return docValue === filterValue; + } + + regex(docValue: any, filterValue: RegExp) { + return filterValue.test(docValue); + } + + ipRange(docValue: string, filterValue: string) { + return FieldValidator.inIPRange(docValue, {}, { cidr: filterValue }); + } + + validator(docValue: any) { + const fn = get(FieldValidator, this.opConfig.validation_function as string); + return fn(docValue, {}, this.opConfig.validation_function_args); + } + + size(sizeValue: number) { + return sizeValue > this.opConfig.value; + } + + _getSize(doc: any) { + // TODO: what about symbol and bigint ?? + const typeSizes = { + undefined: () => 0, + boolean: () => 4, + number: () => 8, + bigint: () => 8, + string: (item: string) => 2 * item.length, + object: (item: Record): number => { + if (item == null) return 0; + + return Object.keys(item).reduce( + (total, key) => total + this._getSize(key) + this._getSize(item[key]), 0 + ); + } + }; + // TODO: what about symbol and bigint ?? + const type = get(typeSizes, typeof doc, () => 0); + return type(doc); + } + + _exceptionCheck(doc: DataEntity) { + return this.exception_rules!.some((rule) => this._runExceptionRule(doc, rule)); + } + + _runExceptionRule(doc: Record, { field, value, regex }: ExceptionRule) { + const checkValue = doc[field]; + + if (checkValue == null) return false; + + if (regex) return this._checkRegex(checkValue, value as RegExp); + + return checkValue === value; + } + + _checkRegex(checkValue: string, value: RegExp) { + return checkValue.match(value) != null; + } + + _updateRegexRules() { + return this.opConfig.exception_rules!.map((rule) => { + const { field, value, regex } = rule; + + if (regex) { + return { field, value: this._toRegex(value as string), regex }; + } + + return rule; + }); + } + + _toRegex(value: string) { + const lastFwdSlash = value.lastIndexOf('/'); + + const flags = value.slice(lastFwdSlash + 1); + + try { + return new RegExp(value.slice(1, lastFwdSlash), flags); + } catch (e) { + throw new Error(`could not convert ${value} to regex, ${e}`); + } + } +} diff --git a/asset/src/filter/schema.ts b/asset/src/filter/schema.ts new file mode 100644 index 00000000..db420c3d --- /dev/null +++ b/asset/src/filter/schema.ts @@ -0,0 +1,106 @@ +import { + ConvictSchema, isString, isBoolean, has +} from '@terascope/job-components'; +import { FieldValidator } from '@terascope/data-mate'; +import { FilterConfig } from './interfaces.js'; + +// TODO: add more checks around grouping, if one set but not another that is paired +export default class Schema extends ConvictSchema { + build() { + return { + field: { + doc: 'Field to filter on', + format: fieldCheck, + default: null + }, + value: { + doc: 'Value that is compared with document field value', + format: '*', + default: null + }, + invert: { + doc: 'Set to true to return documents that match filter rules', + format: 'Boolean', + default: false + }, + array_index: { + doc: 'Specify array field index to filter on', + format: 'Number', + default: -1 + }, + filter_by: { + doc: 'Filter function options are: match, regex, ip_range, validator or size', + default: 'match', + format: typeCheck + }, + validation_function: { + doc: 'DataMate validation function to apply to a field', + default: null, + format: validatorFuncCheck, + }, + validation_function_args: { + doc: 'Required Validator function args', + default: null, + format: '*' + }, + filtered_to_dead_letter_queue: { + doc: 'Filtered docs are sent to the kafka dead letter queue', + default: false, + format: (val: unknown) => { + if (!isBoolean(val)) { + throw new Error('Paramter "drop_to_dlq" should be a boolean'); + } + } + }, + exception_rules: { + doc: 'Expects an array of objects, ie: [{ field: FIELD NAME, value: STRING or REGEX, regex: BOOLEAN }]. The value property can be a string or a regex, but if it is a regex it must be in format /REGEX/Flags and the regex property should be set to true.', + default: null, + format: (rules: unknown) => { + if (rules == null) return; + if (!Array.isArray(rules)) { + throw new Error(`exception must be an array of objects, got ${rules}`); + } + + for (const rule of rules) { + const { field, value, regex } = rule; + + if (!isString(field) + || value == null + || (regex != null + && !isBoolean(regex) + && !isString(value))) { + throw new Error(`exception properties must be either "field" with a string value or "value" with a non-null value, got ${rules}`); + } + } + } + } + }; + } +} + +function fieldCheck(val: unknown) { + if (Array.isArray(val)) { + if (val.some((v) => !isString(v))) { + throw new Error(`Field must be a string or array of strings, received ${val}`); + } + return; + } else if (!isString(val)) { + throw new Error(`Field must be a string or array of strings, received ${val}`); + } +} + +function typeCheck(val: unknown) { + const filterChoices = ['match', 'regex', 'ip_range', 'validator', 'size']; + + if (val == null || !isString(val) || !filterChoices.includes(val)) { + throw new Error('type must be match, regex, ip_range, validator, or size'); + } +} + +function validatorFuncCheck(val: unknown) { + if (val == null) return; + + if (!isString(val) || !has(FieldValidator, val)) { + throw new Error(`Parameter validation_function was set to "${val}", must be a valid FieldValidator function`); + } +} diff --git a/asset/src/filter_by_date/interfaces.ts b/asset/src/filter_by_date/interfaces.ts new file mode 100644 index 00000000..46188ce1 --- /dev/null +++ b/asset/src/filter_by_date/interfaces.ts @@ -0,0 +1,7 @@ +import { OpConfig } from '@terascope/types'; + +export interface FilterByDateConfig extends OpConfig { + date_field: string; + limit_past: string; + limit_future: string; +} diff --git a/asset/src/filter_by_date/processor.ts b/asset/src/filter_by_date/processor.ts new file mode 100644 index 00000000..e0715cdd --- /dev/null +++ b/asset/src/filter_by_date/processor.ts @@ -0,0 +1,80 @@ +import { + FilterProcessor, Context, ExecutionConfig, + DataEntity, isValidDate, getTime, isISO8601 +} from '@terascope/job-components'; +import ms from 'ms'; +import { FilterByDateConfig } from './interfaces.js'; + +enum DateDirection { + past = 'past', + future = 'future' +} + +export default class FilterByDate extends FilterProcessor { + private limit_past: number; + private limit_future: number; + // a date value is a comparison against a static set date, while the + // other (ie 1Day) is a moving date comparison during the life of the job + private is_precise_past_date = false; + private is_precise_future_date = false; + + constructor(context: Context, opConfig: FilterByDateConfig, exConfig: ExecutionConfig) { + super(context, opConfig, exConfig); + + if (isISO8601(this.opConfig.limit_past)) { + this.limit_past = new Date(this.opConfig.limit_past).getTime(); + this.is_precise_past_date = true; + } else { + this.limit_past = ms(this.opConfig.limit_past as string); + } + + if (isISO8601(this.opConfig.limit_future)) { + this.limit_future = new Date(this.opConfig.limit_future).getTime(); + this.is_precise_future_date = true; + } else { + this.limit_future = ms(this.opConfig.limit_future as string); + } + } + + filter(record: DataEntity) { + const now = Date.now(); + const pastGuard = this._getGuardTime(DateDirection.past, now); + const futureGuard = this._getGuardTime(DateDirection.future, now); + + return this._checkDate(record[this.opConfig.date_field], pastGuard, futureGuard); + } + + _getGuardTime(guardDirection: DateDirection, now: number) { + if (guardDirection === DateDirection.past) { + if (this.is_precise_past_date) { + // static past comparison + return this.limit_past; + } + // moving past range comparison + return now - this.limit_past; + } + + if (this.is_precise_future_date) { + // moving future range comparison + return this.limit_future; + } + + // static future comparison + return now + this.limit_future; + } + + _checkDate(date: unknown, pastGuard: number, futureGuard: number): boolean { + if (this._validTimestamp(date)) { + const milliDate = getTime(date); + + if (milliDate === false) return false; + + return milliDate >= pastGuard && milliDate <= futureGuard; + } + return false; + } + + _validTimestamp(value: unknown): value is Date { + return isValidDate(value); + } +} diff --git a/asset/src/filter_by_date/schema.ts b/asset/src/filter_by_date/schema.ts new file mode 100644 index 00000000..8a20392a --- /dev/null +++ b/asset/src/filter_by_date/schema.ts @@ -0,0 +1,43 @@ +import { ConvictSchema, isString, isISO8601 } from '@terascope/job-components'; +import ms from 'ms'; +import { FilterByDateConfig } from './interfaces.js'; + +export default class Schema extends ConvictSchema { + _limitsSchema(val: unknown) { + if (!isString(val)) { + throw new Error('Limits must be a string'); + } + + if (isISO8601(val)) { + return new Date(val).getTime(); + } + + const result = ms(val); + if (result == null) { + throw new Error(`Invalid date like value, got ${val}`); + } + + return result; + } + + build() { + return { + date_field: { + doc: 'date field', + format: 'required_String', + default: 'date' + }, + limit_past: { + // date string or 1week syntax, maybe use ms library + doc: 'limit on dates before', + format: this._limitsSchema, + default: '1week' + }, + limit_future: { + doc: 'limit on dates after', + format: this._limitsSchema, + default: '1day' + } + }; + } +} diff --git a/asset/src/filter_by_required_fields/interfaces.ts b/asset/src/filter_by_required_fields/interfaces.ts new file mode 100644 index 00000000..ee49c2de --- /dev/null +++ b/asset/src/filter_by_required_fields/interfaces.ts @@ -0,0 +1,12 @@ +import { OpConfig } from '@terascope/types'; + +export enum LogicType { + AND = 'and', + OR = 'or' +} + +export interface FilterByRequiredFieldConfig extends OpConfig { + required_fields: string[]; + filter_type: LogicType; + invert: boolean; +} diff --git a/asset/src/filter_by_required_fields/processor.ts b/asset/src/filter_by_required_fields/processor.ts new file mode 100644 index 00000000..07afc22f --- /dev/null +++ b/asset/src/filter_by_required_fields/processor.ts @@ -0,0 +1,26 @@ +import { FilterProcessor, DataEntity, isNil } from '@terascope/job-components'; +import { FilterByRequiredFieldConfig, LogicType } from './interfaces.js'; + +export default class FilterByRequiredFields extends FilterProcessor { + filter(doc: DataEntity) { + const keep = this._keepDoc(doc); + + if (this.opConfig.invert) { + return !keep; + } + + return keep; + } + + _keepDoc(doc: DataEntity) { + if (this.opConfig.filter_type === LogicType.OR) { + return this.opConfig.required_fields.some((field) => this._validValue(doc[field])); + } + + return this.opConfig.required_fields.every((field) => this._validValue(doc[field])); + } + + _validValue(value: unknown) { + return !isNil(value); + } +} diff --git a/asset/src/filter_by_required_fields/schema.ts b/asset/src/filter_by_required_fields/schema.ts new file mode 100644 index 00000000..a9cc31c0 --- /dev/null +++ b/asset/src/filter_by_required_fields/schema.ts @@ -0,0 +1,42 @@ +import { ConvictSchema, isString } from '@terascope/job-components'; +import { FilterByRequiredFieldConfig } from './interfaces.js'; + +export default class Schema extends ConvictSchema { + build() { + return { + required_fields: { + doc: 'Array of fields that must be present and have a non-null value', + default: [], + format: (val: unknown) => { + if (!Array.isArray(val)) { + throw new Error('Parameter "required_fields" must be an array of strings'); + } + + if (val.length === 0 || !val.every((i) => isString(i))) { + throw new Error('Parameter "required_fields" cannot be empty and must have all string values'); + } + } + }, + filter_type: { + doc: 'AND or OR, if AND then every field is required, if OR just one of the fields', + default: 'AND', + format: (value: unknown) => { + if (!isString(value)) { + throw new Error('Parameter "filter_type" must be a string'); + } + + const lowerCase = value.toLowerCase(); + + if (lowerCase !== 'or' && lowerCase !== 'and') { + throw new Error('value must be "OR" or "AND"'); + } + } + }, + invert: { + doc: 'Set to True to Invert the selection and return records with required fields', + default: false, + format: 'Boolean' + } + }; + } +} diff --git a/asset/src/filter_by_unknown_fields/interfaces.ts b/asset/src/filter_by_unknown_fields/interfaces.ts new file mode 100644 index 00000000..7763b20a --- /dev/null +++ b/asset/src/filter_by_unknown_fields/interfaces.ts @@ -0,0 +1,6 @@ +import { OpConfig } from '@terascope/job-components'; + +export interface FilterByUnknownFieldsConfig extends OpConfig { + known_fields: string[]; + invert: boolean; +} diff --git a/asset/src/filter_by_unknown_fields/processor.ts b/asset/src/filter_by_unknown_fields/processor.ts new file mode 100644 index 00000000..c24a08c0 --- /dev/null +++ b/asset/src/filter_by_unknown_fields/processor.ts @@ -0,0 +1,15 @@ +import { FilterProcessor, DataEntity } from '@terascope/job-components'; +import { FilterByUnknownFieldsConfig } from './interfaces.js'; + +export default class FilterIfUnknownFields extends FilterProcessor { + filter(doc: DataEntity) { + const hasUnknownFields = Object.keys(doc) + .filter((field) => !this.opConfig.known_fields.includes(field)).length > 0; + + if (this.opConfig.invert === true) { + return hasUnknownFields; + } + + return !hasUnknownFields; + } +} diff --git a/asset/src/filter_by_unknown_fields/schema.ts b/asset/src/filter_by_unknown_fields/schema.ts new file mode 100644 index 00000000..8994b21a --- /dev/null +++ b/asset/src/filter_by_unknown_fields/schema.ts @@ -0,0 +1,27 @@ +import { ConvictSchema, isString } from '@terascope/job-components'; +import { FilterByUnknownFieldsConfig } from './interfaces.js'; + +export default class Schema extends ConvictSchema { + build() { + return { + known_fields: { + doc: 'List of fields that are known to exist on the record', + default: [], + format: (val: unknown) => { + if (!Array.isArray(val)) { + throw new Error('Parameter "required_fields" must be an array of strings'); + } + + if (val.length === 0 || !val.every((i) => isString(i))) { + throw new Error('Parameter "required_fields" cannot be empty and must have all string values'); + } + } + }, + invert: { + doc: 'Set invert to True to return records with unknown fields', + format: 'Boolean', + default: false + } + }; + } +} diff --git a/asset/src/index.ts b/asset/src/index.ts index 18f1cfca..c76d08a8 100644 --- a/asset/src/index.ts +++ b/asset/src/index.ts @@ -13,9 +13,15 @@ import AddShortIdSchema from './add_short_id/schema.js'; import CopyField from './copy_field/processor.js'; import CopyFieldSchema from './copy_field/schema.js'; +import CopyMetadataField from './copy_metadata_field/processor.js'; +import CopyMetadataFieldSchema from './copy_metadata_field/schema.js'; + import CountByField from './count_by_field/processor.js'; import CountByFieldSchema from './count_by_field/schema.js'; +import CountUnique from './count_unique/processor.js'; +import CountUniqueSchema from './count_unique/schema.js'; + import DataGeneratorFetcher from './data_generator/fetcher.js'; import DataGeneratorSchema from './data_generator/schema.js'; import DataGeneratorSlicer from './data_generator/slicer.js'; @@ -44,12 +50,27 @@ import ExtractionSchema from './extraction/schema.js'; import FieldRouter from './field_router/processor.js'; import FieldRouterSchema from './field_router/schema.js'; +import Filter from './filter/processor.js'; +import FilterSchema from './filter/schema.js'; + +import FilterByDate from './filter_by_date/processor.js'; +import FilterByDateSchema from './filter_by_date/schema.js'; + +import FilterByRequiredFields from './filter_by_required_fields/processor.js'; +import FilterByRequiredFieldsSchema from './filter_by_required_fields/schema.js'; + +import FilterByUnknownFields from './filter_by_unknown_fields/processor.js'; +import FilterByUnknownFieldsSchema from './filter_by_unknown_fields/schema.js'; + import GroupBy from './group_by/processor.js'; import GroupBySchema from './group_by/schema.js'; import HashRouter from './hash_router/processor.js'; import HashRouterSchema from './hash_router/schema.js'; +import JSONParser from './json_parser/processor.js'; +import JSONParserSchema from './json_parser/schema.js'; + import KeyRouter from './key_router/processor.js'; import KeyRouterSchema from './key_router/schema.js'; @@ -62,18 +83,30 @@ import OutputSchema from './output/schema.js'; import PostProcess from './post_process/processor.js'; import PostProcessSchema from './post_process/schema.js'; +import RemoveEmptyFields from './remove_empty_fields/processor.js'; +import RemoveEmptyFieldsSchema from './remove_empty_fields/schema.js'; + import RemoveKey from './remove_key/processor.js'; import RemoveKeySchema from './remove_key/schema.js'; import RoutedSender from './routed_sender/processor.js'; import RoutedSenderSchema from './routed_sender/schema.js'; +import SampleExact from './sample_exact/processor.js'; +import SampleExactSchema from './sample_exact/schema.js'; + +import SampleRandom from './sample_random/processor.js'; +import SampleRandomSchema from './sample_random/schema.js'; + import Selection from './selection/processor.js'; import SelectionSchema from './selection/schema.js'; import SetField from './set_field/processor.js'; import SetFieldSchema from './set_field/schema.js'; +import SetFieldConditional from './set_field_conditional/processor.js'; +import SetFieldConditionalSchema from './set_field_conditional/schema.js'; + import SetKey from './set_key/processor.js'; import SetKeySchema from './set_key/schema.js'; @@ -110,10 +143,18 @@ export const ASSETS = { Processor: CopyField, Schema: CopyFieldSchema }, + copy_metadata_field: { + Processor: CopyMetadataField, + Schema: CopyMetadataFieldSchema + }, count_by_field: { Processor: CountByField, Schema: CountByFieldSchema }, + count_unique: { + Processor: CountUnique, + Schema: CountUniqueSchema + }, data_generator: { Fetcher: DataGeneratorFetcher, Schema: DataGeneratorSchema, @@ -123,6 +164,10 @@ export const ASSETS = { Processor: DataWindowToArray, Schema: DataWindowToArraySchema }, + filter_by_date: { + Processor: FilterByDate, + Schema: FilterByDateSchema + }, date_router: { Processor: DateRouter, Schema: DateRouterSchema @@ -151,6 +196,18 @@ export const ASSETS = { Processor: FieldRouter, Schema: FieldRouterSchema }, + filter: { + Processor: Filter, + Schema: FilterSchema + }, + filter_by_required_fields: { + Processor: FilterByRequiredFields, + Schema: FilterByRequiredFieldsSchema + }, + filter_by_unknown_fields: { + Processor: FilterByUnknownFields, + Schema: FilterByUnknownFieldsSchema + }, group_by: { Processor: GroupBy, Schema: GroupBySchema @@ -159,6 +216,10 @@ export const ASSETS = { Processor: HashRouter, Schema: HashRouterSchema }, + json_parser: { + Processor: JSONParser, + Schema: JSONParserSchema + }, key_router: { Processor: KeyRouter, Schema: KeyRouterSchema @@ -175,6 +236,10 @@ export const ASSETS = { Processor: PostProcess, Schema: PostProcessSchema }, + remove_empty_fields: { + Processor: RemoveEmptyFields, + Schema: RemoveEmptyFieldsSchema + }, remove_key: { Processor: RemoveKey, Schema: RemoveKeySchema @@ -183,6 +248,14 @@ export const ASSETS = { Processor: RoutedSender, Schema: RoutedSenderSchema }, + sample_exact: { + Processor: SampleExact, + Schema: SampleExactSchema + }, + sample_random: { + Processor: SampleRandom, + Schema: SampleRandomSchema + }, selection: { Processor: Selection, Schema: SelectionSchema @@ -191,6 +264,10 @@ export const ASSETS = { Processor: SetField, Schema: SetFieldSchema }, + set_field_conditional: { + Processor: SetFieldConditional, + Schema: SetFieldConditionalSchema + }, set_key: { Processor: SetKey, Schema: SetKeySchema diff --git a/asset/src/json_parser/processor.ts b/asset/src/json_parser/processor.ts new file mode 100644 index 00000000..60d12bb4 --- /dev/null +++ b/asset/src/json_parser/processor.ts @@ -0,0 +1,22 @@ +import { BatchProcessor, DataEntity } from '@terascope/job-components'; + +export default class JSONParser extends BatchProcessor { + // @ts-expect-error TODO: fix this type issue + onBatch(docArray: DataEntity[]) { + return docArray.reduce((parsedDocs, doc) => { + try { + const dataString = Buffer.from(doc.getRawData()).toString('utf8') + .trim(); + + const toJson = JSON.parse(dataString); + + parsedDocs.push(DataEntity.make(toJson, doc.getMetadata())); + // TODO: fix this type issue + } catch (err: any) { + this.rejectRecord(doc.getRawData(), err.message); + } + + return parsedDocs; + }, []); + } +} diff --git a/asset/src/json_parser/schema.ts b/asset/src/json_parser/schema.ts new file mode 100644 index 00000000..0f67eb30 --- /dev/null +++ b/asset/src/json_parser/schema.ts @@ -0,0 +1,15 @@ +import { ConvictSchema, OpConfig } from '@terascope/job-components'; + +// TODO: check check if api name is real and available +export default class Schema extends ConvictSchema { + build() { + return { + // maybe document its an inbuilt setting? + _dead_letter_action: { + doc: 'Dead letter action if the incoming buffer cannot be parsed to JSON, defaults to log', + default: 'log', + value: 'required_String' + } + }; + } +} diff --git a/asset/src/remove_empty_fields/processor.ts b/asset/src/remove_empty_fields/processor.ts new file mode 100644 index 00000000..4f7f64b0 --- /dev/null +++ b/asset/src/remove_empty_fields/processor.ts @@ -0,0 +1,25 @@ +import { MapProcessor, DataEntity, isEmpty } from '@terascope/job-components'; + +export default class RemoveEmptyProperties extends MapProcessor { + map(doc: DataEntity) { + for (const [key, value] of Object.entries(doc)) { + if (this._isEmptyField(value)) delete doc[key]; + } + + return doc; + } + + _isEmptyField(value: unknown) { + if (typeof value === 'boolean' || typeof value === 'number') return false; + + if (value == null) return true; + + // handles string + if (typeof value === 'string') { + return isEmpty(value.trim()); + } + + // object, array + return isEmpty(value); + } +} diff --git a/asset/src/remove_empty_fields/schema.ts b/asset/src/remove_empty_fields/schema.ts new file mode 100644 index 00000000..6554849c --- /dev/null +++ b/asset/src/remove_empty_fields/schema.ts @@ -0,0 +1,7 @@ +import { ConvictSchema, OpConfig } from '@terascope/job-components'; + +export default class Schema extends ConvictSchema { + build() { + return {}; + } +} diff --git a/asset/src/routed_sender/processor.ts b/asset/src/routed_sender/processor.ts index 6b930a69..5157fddf 100644 --- a/asset/src/routed_sender/processor.ts +++ b/asset/src/routed_sender/processor.ts @@ -9,9 +9,7 @@ import { TSError, DataEntity, isEmpty, AnyObject } from '@terascope/utils'; import { RoutedSender } from '@terascope/standard-asset-apis'; -import { - RouteSenderConfig -} from './interfaces.js'; +import { RouteSenderConfig } from './interfaces.js'; type SenderFactoryAPI = APIFactoryRegistry; @@ -78,8 +76,8 @@ export default class RoutedSenderProcessor extends BatchProcessor { await this.routedSender.route(batch); - await this.routedSender.send(); + return batch; } } diff --git a/asset/src/sample_exact/interfaces.ts b/asset/src/sample_exact/interfaces.ts new file mode 100644 index 00000000..af853f58 --- /dev/null +++ b/asset/src/sample_exact/interfaces.ts @@ -0,0 +1,5 @@ +import { OpConfig } from '@terascope/types'; + +export interface SampleExactConfig extends OpConfig { + percent_kept: number; +} diff --git a/asset/src/sample_exact/processor.ts b/asset/src/sample_exact/processor.ts new file mode 100644 index 00000000..75dd610a --- /dev/null +++ b/asset/src/sample_exact/processor.ts @@ -0,0 +1,35 @@ +import { + BatchProcessor, Context, DataEntity, ExecutionConfig +} from '@terascope/job-components'; +import { SampleExactConfig } from './interfaces.js'; + +export default class SampleExact extends BatchProcessor { + readonly percentage: number; + + constructor(context: Context, opConfig: SampleExactConfig, exConfig: ExecutionConfig) { + super(context, opConfig, exConfig); + this.percentage = this.opConfig.percent_kept / 100; + } + + async onBatch(dataArray: DataEntity[]) { + this._shuffleArray(dataArray); + const length = Math.floor(dataArray.length * this.percentage); + return dataArray.slice(0, length); + } + + /* + * Randomize array element order in-place. + * Using Durstenfeld shuffle algorithm. + * https://stackoverflow.com/a/12646864 + */ + _shuffleArray(array: DataEntity[]) { + for (let i = array.length - 1; i > 0; i -= 1) { + const j = Math.floor(Math.random() * (i + 1)); + const temp = array[i]; + array[i] = array[j]; + array[j] = temp; + } + + return array; + } +} diff --git a/asset/src/sample_exact/schema.ts b/asset/src/sample_exact/schema.ts new file mode 100644 index 00000000..0961f28b --- /dev/null +++ b/asset/src/sample_exact/schema.ts @@ -0,0 +1,18 @@ +import { ConvictSchema, isNumber } from '@terascope/job-components'; +import { SampleExactConfig } from './interfaces.js'; + +export default class Schema extends ConvictSchema { + build() { + return { + percent_kept: { + doc: 'The percentage of documents to be kept from the input. Must be between 0 and 100. (Default: 100)', + default: 100, + format(val: unknown) { + if (!isNumber(val) || (val < 0) || (val > 100)) { + throw new Error('Percentage must be a number between 0 and 100.'); + } + }, + } + }; + } +} diff --git a/asset/src/sample_random/interfaces.ts b/asset/src/sample_random/interfaces.ts new file mode 100644 index 00000000..2f6eed12 --- /dev/null +++ b/asset/src/sample_random/interfaces.ts @@ -0,0 +1,5 @@ +import { OpConfig } from '@terascope/types'; + +export interface SampleRandomConfig extends OpConfig { + probability_to_keep: number; +} diff --git a/asset/src/sample_random/processor.ts b/asset/src/sample_random/processor.ts new file mode 100644 index 00000000..6f112d8b --- /dev/null +++ b/asset/src/sample_random/processor.ts @@ -0,0 +1,16 @@ +import { BatchProcessor, DataEntity, random } from '@terascope/job-components'; +import { SampleRandomConfig } from './interfaces.js'; + +export default class SampleRandom extends BatchProcessor { + async onBatch(dataArray: DataEntity[]) { + const outData: DataEntity[] = []; + + for (const doc of dataArray) { + if (random(0, 99) <= this.opConfig.probability_to_keep) { + outData.push(doc); + } + } + + return outData; + } +} diff --git a/asset/src/sample_random/schema.ts b/asset/src/sample_random/schema.ts new file mode 100644 index 00000000..41226a1c --- /dev/null +++ b/asset/src/sample_random/schema.ts @@ -0,0 +1,18 @@ +import { ConvictSchema, isNumber } from '@terascope/job-components'; +import { SampleRandomConfig } from './interfaces.js'; + +export default class Schema extends ConvictSchema { + build() { + return { + probability_to_keep: { + doc: 'The probability of the record being kept. It iterates through the array and generates a random number between 0 and 100, and if the number <= probability it is kept. Must be between 0 and 100, with 100 keeping all records and 0 rejecting all records. (Default: 100)', + default: 100, + format(val: unknown) { + if (!isNumber(val) || (val < 0) || (val > 100)) { + throw new Error('probability must be a number between 0 and 100.'); + } + }, + } + }; + } +} diff --git a/asset/src/set_field_conditional/interfaces.ts b/asset/src/set_field_conditional/interfaces.ts new file mode 100644 index 00000000..f27fc45c --- /dev/null +++ b/asset/src/set_field_conditional/interfaces.ts @@ -0,0 +1,8 @@ +import { OpConfig } from '@terascope/job-components'; + +export interface SetFieldConditionalConfig extends OpConfig { + conditional_field: string; + conditional_values: any[]; + set_field: string; + value: any; +} diff --git a/asset/src/set_field_conditional/processor.ts b/asset/src/set_field_conditional/processor.ts new file mode 100644 index 00000000..91578395 --- /dev/null +++ b/asset/src/set_field_conditional/processor.ts @@ -0,0 +1,24 @@ +import { + MapProcessor, Context, DataEntity, + ExecutionConfig, set, +} from '@terascope/job-components'; +import { SetFieldConditionalConfig } from './interfaces.js'; + +export default class SetFieldConditional extends MapProcessor { + valuesMap = new Map(); + + constructor(context: Context, opConfig: SetFieldConditionalConfig, exConfig: ExecutionConfig) { + super(context, opConfig, exConfig); + for (const value of opConfig.conditional_values) { + this.valuesMap.set(value, value); + } + } + + map(data: DataEntity) { + if (this.valuesMap.has(data[this.opConfig.conditional_field])) { + set(data, this.opConfig.set_field, this.opConfig.value); + } + + return data; + } +} diff --git a/asset/src/set_field_conditional/schema.ts b/asset/src/set_field_conditional/schema.ts new file mode 100644 index 00000000..7e93224b --- /dev/null +++ b/asset/src/set_field_conditional/schema.ts @@ -0,0 +1,33 @@ +import { ConvictSchema } from '@terascope/job-components'; +import { SetFieldConditionalConfig } from './interfaces.js'; + +export default class Schema extends ConvictSchema { + build() { + return { + conditional_field: { + default: '', + doc: 'Name of the field', + format: 'required_String', + }, + conditional_values: { + default: [], + doc: 'Value of the field', + format: (val: unknown) => { + if (!Array.isArray(val)) { + throw new Error('Parameter "conditional_values" must be an array'); + } + } + }, + set_field: { + default: '', + doc: 'Name of the field', + format: 'required_String', + }, + value: { + default: null, + doc: 'Value of the field', + format: '*' + } + }; + } +} diff --git a/docs/operations/copy_metadata_field.md b/docs/operations/copy_metadata_field.md new file mode 100644 index 00000000..14ad090c --- /dev/null +++ b/docs/operations/copy_metadata_field.md @@ -0,0 +1,57 @@ +# copy_metadata_field + +The `copy_metadata_field` processor copies the metadata field value to a destination field for any [DataEntity](https://terascope.github.io/teraslice/docs/packages/utils/api/classes/dataentity) or [DataWindow](../entity/data-window.md). + +## Usage + +### Copy a field value to another field + +Example of a job using the `copy_metadata_field` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "copy_metadata_field", + "meta_key": "_key", + "destination": "myField" + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + DataEntity.make({ name: 'lilly', otherField: 1 }, { _key: 'a1' }), + DataEntity.make({ name: 'willy', otherField: 2 }, { _key: 'b2' }), + DataEntity.make({ name: 'billy', otherField: 3 }, { _key: 'c3' }), + DataEntity.make({ name: 'dilly', otherField: 4 }, { _key: 'd4' }), +] + +const results = await processor.run(data); + +DataEntity.make({ name: 'lilly', myField: 'a1', otherField: 1 }), +DataEntity.make({ name: 'willy', myField: 'b2', otherField: 2 }), +DataEntity.make({ name: 'billy', myField: 'c3', otherField: 3 }), +DataEntity.make({ name: 'dilly', myField: 'd4', otherField: 4 }), +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| meta_key | Name of metadata field to copy the value from | required, defaults to "_key" | +| destination | Name of field to copy the value to | required, no default | diff --git a/docs/operations/count_unique.md b/docs/operations/count_unique.md new file mode 100644 index 00000000..87db653e --- /dev/null +++ b/docs/operations/count_unique.md @@ -0,0 +1,104 @@ +# count_unique + +The `count_unique` processor returns a list of unique values and how many times it appears within the slice for any [DataEntity](https://terascope.github.io/teraslice/docs/packages/utils/api/classes/dataentity) or [DataWindow](../entity/data-window.md). + +## Usage + +### Count the amount of times a given field is shown + +Example of a job using the `count_unique` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "count_unique", + "field": "_key", + "is_meta_field": true + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + DataEntity.make({ name: 'lilly', otherField: 1 }, { _key: 1 }), + DataEntity.make({ name: 'willy', otherField: 2 }, { _key: 2 }), + DataEntity.make({ name: 'billy', otherField: 3 }, { _key: 1 }), + DataEntity.make({ name: 'dilly', otherField: 4 }, { _key: 3 }), + DataEntity.make({ name: 'chilly', otherField: 4 }, { _key: 1 }), + DataEntity.make({ name: 'silly', otherField: 4 }, { _key: 1 }), +] + +const results = await processor.run(data); + +DataEntity.make({ count: 4, _key_: 1, }), +DataEntity.make({ count: 1, _key_: 2, }), +DataEntity.make({ count: 1, _key_: 3, }), +``` + +Example of a Job checking against a regular field, and preserving the last seen fields of said record + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "count_unique", + "field": "otherField", + "is_meta_field": false, + "preserve_fields": ["name"] + } + ] +} + +``` + +```javascript +const data = [ + DataEntity.make({ name: 'lilly', otherField: 1 }, { _key: 1 }), + DataEntity.make({ name: 'willy', otherField: 2 }, { _key: 2 }), + DataEntity.make({ name: 'billy', otherField: 3 }, { _key: 1 }), + DataEntity.make({ name: 'dilly', otherField: 4 }, { _key: 3 }), + DataEntity.make({ name: 'chilly', otherField: 4 }, { _key: 1 }), + DataEntity.make({ name: 'silly', otherField: 4 }, { _key: 1 }), +] + +const results = await processor.run(data); + +DataEntity.make({ count: 3, name: 'silly', otherField: 4, }), +DataEntity.make({ count: 1, name: 'billy', otherField: 3, }), +DataEntity.make({ count: 1, name: 'willy' otherField: 2, }), +DataEntity.make({ count: 1, name: 'lilly', otherField: 1, }), + +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| preserve_fields | A list of fields whose last seen values are added to the result in addition to the count | optional, defaults to an empty array | +| field | field to get count on | required, defaults to "_key" | +| is_meta_field | determines if the "field" parameter is a Metadata field or an actual field on the record| required, defaults to true since _key is a Metadata field | diff --git a/docs/operations/debug_routes.md b/docs/operations/debug_routes.md new file mode 100644 index 00000000..4052f7f7 --- /dev/null +++ b/docs/operations/debug_routes.md @@ -0,0 +1,54 @@ +# debug_routes + +The `debug_routes` processor helps with debugging and inspecting a slice to see how many records belong to a given route as marked in the metadata key 'standard:route' in [DataEntity](https://terascope.github.io/teraslice/docs/packages/utils/api/classes/dataentity) or [DataWindow](../entity/data-window.md). The key 'standard:route' is used by the routed_sender processor. + +## Usage + +### Log the number of unique routes to stdout + +Example of a job using the `debug_routes` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "debug_routes", + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + DataEntity.make({ id: 1 }, { _key: '1' , 'standard:route': 'a' }), + DataEntity.make({ id: 2 }, { _key: '2' , 'standard:route': 'b' }), + DataEntity.make({ id: 3 }, { _key: '3' , 'standard:route': 'a' }), + DataEntity.make({ id: 4 }, { _key: '4' , 'standard:route': 'c' }), + DataEntity.make({ id: 5 }, { _key: '5' , 'standard:route': 'a' }), + DataEntity.make({ id: 6 }, { _key: '6' , 'standard:route': 'b' }), +] + +const results = await processor.run(data); +/* logs to stdout +* "{ a: 3, b: 2, c: 1 }\n" +*/ +results === data; +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | diff --git a/docs/operations/drop_field_conditional.md b/docs/operations/drop_field_conditional.md index 96c6dfa8..087a2ccf 100644 --- a/docs/operations/drop_field_conditional.md +++ b/docs/operations/drop_field_conditional.md @@ -1,4 +1,4 @@ -# drop_field +# drop_field_conditional The `drop_field_conditional` processor drops a field from [DataEntities](https://terascope.github.io/teraslice/docs/packages/utils/api/classes/dataentity) or [DataWindows](../entity/data-window.md) based on the specified conditions. The conditions can be either a regex or a [Field Validation](https://terascope.github.io/teraslice/docs/packages/data-mate/overview#Field-Validations) function. There is also an `invert` option to drop fields that don't match a regex or don't pass the validation function. Only a regex or a validation can be specified, if both are configured the job will throw an error. @@ -148,4 +148,3 @@ DataEntity.make({ name: 'ron' }), | validation_method | Name of validation method to apply to field value | see [Field Validations](https://terascope.github.io/teraslice/docs/packages/data-mate/overview#field-validations) for list of available functions | | validation_args | some validations require args | optional | | invert | When set to `true`, the processor drops the value if it doesn't match the regex or if it doesn't pass the validation | defaults to `false` | - diff --git a/docs/operations/filter.md b/docs/operations/filter.md new file mode 100644 index 00000000..717b82aa --- /dev/null +++ b/docs/operations/filter.md @@ -0,0 +1,207 @@ +# filter + +Drops docs if the field value meets the criteria provided by filter_by, field, and value. Filter_by field can be a strict match, regex match, or within an ip range using cidr notation. If invert is true then processor returns objects whose value meets the criteria. Criteria value can be a single item or an array of items. + +## Usage + +### Filter out record by value matching + +Example of a job using the `filter` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter", + "field": "name", + "value": "bob" + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + { _key: 0, ip: '28.127.246.12', name: 'francis' }, + { _key: 1, ip: '28.127.246.232', name: 'joseph' }, + { _key: 2, ip: '28.127.246.244', name: 'Johnson' }, + { _key: 3, ip: '4.17.23.6', name: 'bob' }, + { _key: 4, ip: '4.17.14.18', name: 'greg' }, +]; + +const results = await processor.run(data); + +results === [ + { _key: 0, ip: '28.127.246.12', name: 'francis' }, + { _key: 1, ip: '28.127.246.232', name: 'joseph' }, + { _key: 2, ip: '28.127.246.244', name: 'Johnson' }, + { _key: 4, ip: '4.17.14.18', name: 'greg' }, +] +``` + +### Filter by value matching inverted (only keep record that matches) + +Example of a job using the `filter` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter", + "field": "name", + "value": "bob", + "invert": true + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + { _key: 0, ip: '28.127.246.12', name: 'francis' }, + { _key: 1, ip: '28.127.246.232', name: 'joseph' }, + { _key: 2, ip: '28.127.246.244', name: 'Johnson' }, + { _key: 3, ip: '4.17.23.6', name: 'bob' }, + { _key: 4, ip: '4.17.14.18', name: 'greg' }, +]; + +const results = await processor.run(data); + +results === [ + { _key: 3, ip: '4.17.23.6', name: 'bob' }, +] +``` + +### Filter by regex + +Example of a job using the `filter` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter", + "field": "name", + "value": "/^jo.*/i", + "filter_by": "regex" + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + { _key: 0, ip: '28.127.246.12', name: 'francis' }, + { _key: 1, ip: '28.127.246.232', name: 'joseph' }, + { _key: 2, ip: '28.127.246.244', name: 'Johnson' }, + { _key: 3, ip: '4.17.23.6', name: 'bob' }, + { _key: 4, ip: '4.17.14.18', name: 'greg' }, +]; + +const results = await processor.run(data); + +results === [ + { _key: 0, ip: '28.127.246.12', name: 'francis' }, + { _key: 3, ip: '4.17.23.6', name: 'bob' }, + { _key: 4, ip: '4.17.14.18', name: 'greg' }, +] +``` + +### Filter by ip_range + +Example of a job using the `filter` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter", + "field": "ip", + "value": "28.127.246.0/26", + "filter_by": "ip_range" + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + { _key: 0, ip: '28.127.246.12', name: 'francis' }, + { _key: 1, ip: '28.127.246.232', name: 'joseph' }, + { _key: 2, ip: '28.127.246.244', name: 'Johnson' }, + { _key: 3, ip: '4.17.23.6', name: 'bob' }, + { _key: 4, ip: '4.17.14.18', name: 'greg' }, +]; + +const results = await processor.run(data); + +results === [ + { _key: 1, ip: '28.127.246.232', name: 'joseph' }, + { _key: 2, ip: '28.127.246.244', name: 'Johnson' }, + { _key: 3, ip: '4.17.23.6', name: 'bob' }, + { _key: 4, ip: '4.17.14.18', name: 'greg' }, +] +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| field | Field to filter on | required, no default | +| value | Value that is compared with document field value | required, no default | +| invert | Set to true to return documents that match filter rules | optional, defaults to `false` | +| array_index | Specify array field index to filter on | optional, defaults to `false` | +| filter_by | Filter function options are: match, regex, ip_range, validator or size | optional, defaults to `match` | +| validation_function | DataMate validation function to apply to a field | optional | +| validation_function_args | Required Validator function args | optional| +| filtered_to_dead_letter_queue | Filtered docs are sent to the kafka dead letter queue | optional, defaults to `false` | +| exception_rules | Expects an array of objects, ie: [{ field: FIELD NAME, value: STRING or REGEX, regex: BOOLEAN }]. The value property can be a string or a regex, but if it is a regex it must be in format /REGEX/Flags and the regex property should be set to true. | optional, defaults to `null` | diff --git a/docs/operations/filter_by_date.md b/docs/operations/filter_by_date.md new file mode 100644 index 00000000..1669fb89 --- /dev/null +++ b/docs/operations/filter_by_date.md @@ -0,0 +1,58 @@ +# filter_by_date + +The `filter_by_date` processor filters records based on if the date value is within a given range + +## Usage + +### Filter records based off of date ranges + +Example of a job using the `filter_by_date` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter_by_date", + "limit_past": "2week", + "limit_future": "2day" + } + ] +} + +``` +Example of the data and the expected results + +```javascript + +// assuming current date is '2022-11-07T16:43:38.309Z' +const data = [ + DataEntity.make({ id: 1, timestamp: '2022-11-07T12:41:38.009Z' }), + DataEntity.make({ id: 2, timestamp: '2022-09-02T12:13:28.823Z' }), + DataEntity.make({ id: 3, timestamp: '2022-11-08T11:24:11.101Z' }), + DataEntity.make({ id: 4, timestamp: '2022-11-11T09:22:54.534Z' }), +] + +const results = await processor.run(data); + +DataEntity.make({ id: 1, timestamp: '2022-11-07T12:41:38.009Z' }), +DataEntity.make({ id: 3, timestamp: '2022-11-08T11:24:11.101Z' }), +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| date_field | The name of the date field to check | required, defaults to the "date" field | +| limit_past | The lower date limit a date can be, can either be any exact date (ie an ) | required, defaults to '1week' | +| limit_future | The higher date limit a date can be | required, defaults to '1day' | diff --git a/docs/operations/filter_by_required_fields.md b/docs/operations/filter_by_required_fields.md new file mode 100644 index 00000000..89017359 --- /dev/null +++ b/docs/operations/filter_by_required_fields.md @@ -0,0 +1,166 @@ +# filter_by_required_fields + +The `filter_by_required_fields` processor copies the source field value to a destination field for any [DataEntity](https://terascope.github.io/teraslice/docs/packages/utils/api/classes/dataentity) or [DataWindow](../entity/data-window.md). + +## Usage + +### Find Records that have all three fields with non-nullish values + +Example of a job using the `filter_by_required_fields` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter_by_required_fields", + "required_fields": ["age", "name", "size"], + "filter_type": "AND" + } + ] +} + +``` +Example of the data and the expected results + +```javascript + const data = [ + { + age: 20, + name: 'bob1', + size: 10 + }, + { + name: 'bob2', + size: 11 + }, + { + age: 21, + size: 12 + }, + { + age: 22, + name: 'bob3', + }, + { + goop: true + }, + { + age: undefined, + name: 'bob4', + size: 13 + }, + { + age: 23, + name: 'NA', + size: 14 + }, + { + age: 24, + name: 'bob5', + size: '' + }, + { + age: 25, + name: 'bob6', + size: null + }, + { + age: 26, + name: 'bob7', + size: 15 + } +]; + +const results = await processor.run(data); + +results === [ + { age: 20, name: 'bob1', size: 10 }, + { age: 26, name: 'bob7', size: 15 } +] +``` + +### Find Records that neither have + +Example of a job using the `filter_by_required_fields` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter_by_required_fields", + "required_fields": ["age", "size"], + "filter_type": "OR", + "invert": true + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + { age: 20, name: 'bob1', size: 10 }, + { name: 'bob2' }, + { + age: 21, + size: 12 + }, + { + age: 22, + name: 'bob3', + }, + { + goop: true, + name: 'bob', + date: 'sometime' + }, + { + age: 25, + name: 'bob6', + size: null + }, + { + age: null, + name: 'bob7', + size: null + } +]; + +const results = await processor.run(data); + +results === [ + { name: 'bob2' }, + { goop: true, name: 'bob', date: 'sometime' }, + { age: null, name: 'bob7', size: null } +] +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| required_fields | Array of fields that must be present and have a non-null value | required, no default | +| filter_type | AND or OR, if AND then every field is required, if OR just one of the fields | required, defaults to AND | +| invert | Set to True to Invert the selection and return records with required fields, defaults to `false` | diff --git a/docs/operations/filter_by_unknown_fields.md b/docs/operations/filter_by_unknown_fields.md new file mode 100644 index 00000000..9009ea76 --- /dev/null +++ b/docs/operations/filter_by_unknown_fields.md @@ -0,0 +1,103 @@ +# filter_by_unknown_fields + +The `filter_by_unknown_fields` processor filters documents based on whether a record has extra unknown fields. + +## Usage + +### Filtering records to only include known fields + +Example of a job using the `filter_by_unknown_fields` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter_by_unknown_fields", + "known_fields": ["name", "age", "height"] + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + { name: 'joe', age: 32, height: 100 }, + { name: 'mel', age: 20, height: 200 }, + { name: 'tim', age: 33, height: 150, year: 2022 }, + { name: 'red', age: 38, height: 120 }, + { name: 'frey', age: 48, height: 125 } +]; + +const results = await processor.run(data); + +results === const data = [ + { name: 'joe', age: 32, height: 100 }, + { name: 'mel', age: 20, height: 200 }, + { name: 'red', age: 38, height: 120 }, + { name: 'frey', age: 48, height: 125 } +]; +``` + +### Filtering records to find those with unknown fields + +Example of a job using the `filter_by_unknown_fields` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "filter_by_unknown_fields", + "known_fields": ["name", "age", "height"], + "invert": true + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + { name: 'joe', age: 32, height: 100 }, + { name: 'mel', age: 20, height: 200 }, + { name: 'tim', age: 33, height: 150, year: 2022 }, + { name: 'red', age: 38, height: 120 }, + { name: 'frey', age: 48, height: 125 } +]; + +const results = await processor.run(data); + +results === const data = [ + { name: 'tim', age: 33, height: 150, year: 2022 }, +]; +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| known_fields | A list of known fields on the record | required, no default | +| invert | Set invert to True to return records with unknown fields | optional, defaults to false | diff --git a/docs/operations/json_parser.md b/docs/operations/json_parser.md new file mode 100644 index 00000000..41539d58 --- /dev/null +++ b/docs/operations/json_parser.md @@ -0,0 +1,61 @@ +# json_parser + +The `json_parser` processor attempts to transform the buffer data to json + Uses the _dead_letter_queue options to handle parsing errors which are none (ignore), log, throw or sends bad docs to a kafka topic specified in the api property of the job. + see https://terascope.github.io/teraslice/docs/jobs/dead-letter-queue#docsNav + and https://github.com/terascope/kafka-assets/blob/master/docs/apis/kafka_dead_letter.md for dead letter queue details +## Usage + +### JSON Parse raw records + +Example of a job using the `json_parser` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "json_parser", + "source": "name", + "destination": "name_again" + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + DataEntity.make({}, { _key: '1' }), + DataEntity.make({}, { _key: '2' }), + DataEntity.make({}, { _key: '3' }), +]; + +data[0].setRawData(Buffer.from(JSON.stringify({ id: 1 }), 'utf-8')); +data[1].setRawData(Buffer.from(JSON.stringify({ id: 2 }), 'utf-8')); +data[2].setRawData(Buffer.from(JSON.stringify({ id: 3 }), 'utf-8')); + +const results = await processor.run(data); +[ + DataEntity.make({ id: 1 }); + DataEntity.make({ id: 2 }); + DataEntity.make({ id: 3 }); +] +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| _dead_letter_action | action to take if a doc can not be transformed to JSON; accepts none, throw, log, or an api name | required, defaults to 'log' | diff --git a/docs/operations/remove_empty_fields.md b/docs/operations/remove_empty_fields.md new file mode 100644 index 00000000..9b6d3e9f --- /dev/null +++ b/docs/operations/remove_empty_fields.md @@ -0,0 +1,98 @@ +# remove_empty_fields + +The `remove_empty_fields` processor removes any fields that are considered empty, stings filled with whitespace are considered empty as well. + +## Usage + +### Remove empty fields + +Example of a job using the `remove_empty_fields` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "remove_empty_fields" + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + DataEntity.make({ + id: 1, + name: 'joe', + age: 102.875 + }), + DataEntity.make({ + id: 2, + name: '', + age: 23, + happy: true, + field: [], + field2: {}, + field3: undefined, + field4: null, + field5: 'UNDEFINED' + }), + DataEntity.make({ + id: 3, + name: 'bob', + age: '', + happy: false, + field7: ['thing1', 'thing2'], + field8: { foo: 'bar' } + }), + DataEntity.make({ + id: 4, + name: ' ', + age: '', + size: '' + }), +] + +const results = await processor.run(data); + +[ + DataEntity.make({ + id: 1, + name: 'joe', + age: 102.875 + }), + DataEntity.make({ + id: 2, + age: 23, + happy: true, + field5: 'UNDEFINED' + }), + DataEntity.make({ + id: 3, + name: 'bob', + happy: false, + field7: ['thing1', 'thing2'], + field8: { foo: 'bar' } + }), + DataEntity.make({ + id: 4, + }), +] +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | diff --git a/docs/operations/sample_exact.md b/docs/operations/sample_exact.md new file mode 100644 index 00000000..7a414c3a --- /dev/null +++ b/docs/operations/sample_exact.md @@ -0,0 +1,55 @@ +# sample_exact + +given an array of JSON documents will return an array containing a shuffled subset of those input documents. The size of the subset will be the percentage multiplied against the length of the array rounded down. + +## Usage + +### Reduce and shuffle the returned array + +Example of a job using the `sample_exact` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "sample_exact", + "percent_kept": "50", + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + DataEntity.make({ name: 'lilly', otherField: 1 }), + DataEntity.make({ name: 'willy', otherField: 2 }), + DataEntity.make({ name: 'billy', otherField: 3 }), + DataEntity.make({ name: 'dilly', otherField: 4 }), +] + +const results = await processor.run(data); + +results === [ + { name: 'dilly', name_again: 'dilly', otherField: 4 }, + { name: 'willy', name_again: 'willy', otherField: 2 }, +] +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| percent_kept | The percentage of documents to be kept from the input. Must be between 0 and 100 | required, defaults to 100 | diff --git a/docs/operations/sample_random.md b/docs/operations/sample_random.md new file mode 100644 index 00000000..b0c423b4 --- /dev/null +++ b/docs/operations/sample_random.md @@ -0,0 +1,55 @@ +# sample_random + +given an array of JSON documents will return an array containing a subset of those input documents. It iterates through the array and generates a random number between 0 and 100 for each record, and if the number <= probability it is kept. Must be between 0 and 100, with 100 keeping all records and 0 rejecting all records. + +## Usage + +### Reduce the returned array + +Example of a job using the `sample_random` processor + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "sample_random", + "percent_kept": "50", + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + DataEntity.make({ name: 'lilly', otherField: 1 }), + DataEntity.make({ name: 'willy', otherField: 2 }), + DataEntity.make({ name: 'billy', otherField: 3 }), + DataEntity.make({ name: 'dilly', otherField: 4 }), +] + +const results = await processor.run(data); + +results === [ + { name: 'lilly', otherField: 1 }, + { name: 'billy', otherField: 3 }, +] +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| probability_to_keep | The probability of the record being kept. It iterates through the array and generates a random number between 0 and 100, and if the number <= probability it is kept. Must be between 0 and 100, with 100 keeping all records and 0 rejecting all records | required, defaults to 100 | diff --git a/docs/operations/set_field.md b/docs/operations/set_field.md index 540850a1..642214bc 100644 --- a/docs/operations/set_field.md +++ b/docs/operations/set_field.md @@ -1,4 +1,4 @@ -# set_key +# set_field The `set_field` processor sets the value for a field in any [DataEntity](https://terascope.github.io/teraslice/docs/packages/utils/api/classes/dataentity) or [DataWindow](../entity/data-window.md). If the field already exists on a record the default behavior to is not overwrite it, but there is an option to overwrite the field value even if it exists. @@ -44,7 +44,7 @@ results === [ { name: 'milly', 'some_field': 2 } { name: 'willy', 'some_field': 2 } { name: 'billy', 'some_field': 2 } - { name: 'dilly', 'some_field': 2 } + { name: 'dilly', 'some_field': 2 } ] ``` diff --git a/docs/operations/set_field_conditional.md b/docs/operations/set_field_conditional.md new file mode 100644 index 00000000..960d0d1c --- /dev/null +++ b/docs/operations/set_field_conditional.md @@ -0,0 +1,68 @@ +# set_field_conditional + +The `set_field_conditional` processor sets the value for a field in any [DataEntity](https://terascope.github.io/teraslice/docs/packages/utils/api/classes/dataentity) or [DataWindow](../entity/data-window.md). If the field already exists on a record the default behavior to is not overwrite it, but there is an option to overwrite the field value even if it exists. + +## Usage + +### Example Job + +```json +{ + "name" : "testing", + "workers" : 1, + "slicers" : 1, + "lifecycle" : "once", + "assets" : [ + "standard" + ], + "operations" : [ + { + "_op": "test-reader" + }, + { + "_op": "set_field_conditional", + "conditional_field": "type", + "conditional_values": ["data1", "data2"], + "set_field": "test_prop", + "value": true + } + ] +} + +``` +Example of the data and the expected results + +```javascript +const data = [ + DataEntity.make({ + id: 1, + test_prop: 'value' + }), + DataEntity.make({ + id: 2, + type: 'data2' + }), + DataEntity.make({ + id: 3, + type: 'data3' + }), +] + +const results = await processor.run(data); + +results === [ + { id: 1, type: 'data1', test_prop: true }, + { id: 2, type: 'data2', test_prop: true }, + { id: 3, type: 'data3' } +] +``` + +## Parameters + +| Configuration | Description | Type | Notes | +| ------------- | ------------------------------------------------------------- | ------ | ---------------------------- | +| _op | Name of operation, it must reflect the exact name of the file | String | required | +| conditional_field | Field name to run checks on | String | required | +| conditional_values | Values to check for given Field | Any[] | required | +| set_field | Name of the field to set | String | required | +| value | Value to set field to | Any | required | diff --git a/jest.config.js b/jest.config.js index 9339aa67..bfd6e86e 100644 --- a/jest.config.js +++ b/jest.config.js @@ -2,6 +2,7 @@ import path from 'node:path'; import { fileURLToPath } from 'node:url'; const dirname = path.dirname(fileURLToPath(import.meta.url)); + export default { verbose: true, testEnvironment: 'node', diff --git a/package.json b/package.json index f858d59d..c8ddce07 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "standard-assets-bundle", "displayName": "Standard Assets Bundle", - "version": "1.1.0", + "version": "1.2.0", "private": true, "description": "Teraslice standard processor asset bundle", "type": "module", @@ -29,23 +29,23 @@ "test:watch": "ts-scripts test --watch asset --" }, "devDependencies": { - "@terascope/eslint-config": "^1.1.0", - "@terascope/job-components": "^1.5.1", - "@terascope/scripts": "^1.4.2", - "@terascope/standard-asset-apis": "^1.0.2", - "@types/express": "^4.17.19", - "@types/jest": "^29.5.14", - "@types/json2csv": "^5.0.7", - "@types/node": "^22.7.4", - "@types/node-gzip": "^1.1.0", - "@types/timsort": "^0.3.0", - "eslint": "^9.14.0", - "jest": "^29.7.0", - "jest-extended": "^4.0.2", - "node-notifier": "^10.0.1", - "teraslice-test-harness": "^1.2.0", - "ts-jest": "^29.2.5", - "tslib": "^2.8.1", + "@terascope/eslint-config": "~1.1.0", + "@terascope/job-components": "~1.6.0", + "@terascope/scripts": "~1.5.0", + "@terascope/standard-asset-apis": "~1.0.2", + "@types/express": "~4.17.19", + "@types/jest": "~29.5.14", + "@types/json2csv": "~5.0.7", + "@types/node": "~22.9.0", + "@types/node-gzip": "~1.1.0", + "@types/timsort": "~0.3.0", + "eslint": "~9.14.0", + "jest": "~29.7.0", + "jest-extended": "~4.0.2", + "node-notifier": "~10.0.1", + "teraslice-test-harness": "~1.2.0", + "ts-jest": "~29.2.5", + "tslib": "~2.8.1", "typescript": "~5.2.2" }, "engines": { diff --git a/packages/standard-asset-apis/package.json b/packages/standard-asset-apis/package.json index fc5f5716..57f9c39c 100644 --- a/packages/standard-asset-apis/package.json +++ b/packages/standard-asset-apis/package.json @@ -21,16 +21,17 @@ "test:watch": "ts-scripts test --watch . --" }, "dependencies": { - "@sindresorhus/fnv1a": "^2.0.1", - "@terascope/utils": "^1.3.2" + "@sindresorhus/fnv1a": "~3.1.0", + "@terascope/utils": "~1.4.0" }, "devDependencies": { - "@terascope/scripts": "^1.4.2", - "@types/jest": "^29.5.14", - "jest": "^29.7.0", - "jest-extended": "^4.0.2", - "jest-fixtures": "^0.6.0", - "ts-jest": "^29.2.5" + "@terascope/scripts": "~1.5.0", + "@types/jest": "~29.5.14", + "@types/node": "~22.9.0", + "jest": "~29.7.0", + "jest-extended": "~4.0.2", + "jest-fixtures": "~0.6.0", + "ts-jest": "~29.2.5" }, "engines": { "node": ">=18.0.0", diff --git a/packages/standard-asset-apis/src/RoutedSender.ts b/packages/standard-asset-apis/src/RoutedSender.ts index 6534030c..8d359f0a 100644 --- a/packages/standard-asset-apis/src/RoutedSender.ts +++ b/packages/standard-asset-apis/src/RoutedSender.ts @@ -1,5 +1,6 @@ import { - DataEntity, pMap, getLast, isInteger, RouteSenderAPI, Logger + DataEntity, pMap, getLast, + isInteger, RouteSenderAPI, Logger } from '@terascope/utils'; import EventEmitter, { once } from 'node:events'; diff --git a/packages/standard-asset-apis/src/routers/HashRouter.ts b/packages/standard-asset-apis/src/routers/HashRouter.ts index d7023ae8..70c15815 100644 --- a/packages/standard-asset-apis/src/routers/HashRouter.ts +++ b/packages/standard-asset-apis/src/routers/HashRouter.ts @@ -33,7 +33,7 @@ export class HashRouter implements I.Router { } lookup(record: DataEntity): string { - const bucket = fnv1a(this.getHash(record)) % this.partitions; + const bucket = Number(fnv1a(this.getHash(record))) % this.partitions; return bucket.toString(); } } diff --git a/test/copy_metadata_field/processor-spec.ts b/test/copy_metadata_field/processor-spec.ts new file mode 100644 index 00000000..969cca93 --- /dev/null +++ b/test/copy_metadata_field/processor-spec.ts @@ -0,0 +1,77 @@ +import { DataEntity } from '@terascope/utils'; +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { CopyMetadataFieldConfig } from '../../asset/src/copy_metadata_field/interfaces.js'; + +describe('copy_metadata_field', () => { + let harness: WorkerTestHarness; + let data: DataEntity[]; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'copy_metadata_field', + destination: '_key' + }; + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + beforeEach(() => { + data = [ + DataEntity.make( + { + name: 'chilly', + age: 24 + }, + { + _key: 'qwerty', + } + ), + DataEntity.make( + { + name: 'willy', + age: 225 + }, + { + _key: 'asdfgh', + } + ) + ]; + }); + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should generate an empty result if no input data', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results.length).toBe(0); + }); + + it('by default, it should copy the metadata _key to the _key property on the doc', async () => { + const destination = 'testField'; + harness = await makeTest({ destination }); + const results = await harness.runSlice(data); + + results.forEach((result) => { + expect(result[destination]).toEqual(result.getMetadata('_key')); + }); + }); + + it('should copy any metadata key specified', async () => { + const destination = 'testTime'; + const meta_key = '_createTime'; + + harness = await makeTest({ destination, meta_key }); + const results = await harness.runSlice(data); + + results.forEach((result) => { + expect(result[destination]).toBeNumber(); + }); + }); +}); diff --git a/test/copy_metadata_field/schema-spec.ts b/test/copy_metadata_field/schema-spec.ts new file mode 100644 index 00000000..285324ee --- /dev/null +++ b/test/copy_metadata_field/schema-spec.ts @@ -0,0 +1,35 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('copy_metadata_field schema', () => { + let harness: WorkerTestHarness; + const name = 'copy_metadata_field'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({})).toReject(); + await expect(makeSchema({ destination: ['some stuff'] })).toReject(); + await expect(makeSchema({ destination: 'some_destination', meta_key: true })).toReject(); + await expect(makeSchema({ destination: true, source: 'field' })).toReject(); + await expect(makeSchema({ destination: 'true', meta_key: 1234 })).toReject(); + + await expect(makeSchema({ destination: 'someField' })).toResolve(); + await expect(makeSchema({ destination: 'someField', meta_key: 'some_key' })).toResolve(); + }); +}); diff --git a/test/count_by_field/processor-spec.ts b/test/count_by_field/processor-spec.ts index f21c5cfe..1d5e722d 100644 --- a/test/count_by_field/processor-spec.ts +++ b/test/count_by_field/processor-spec.ts @@ -76,7 +76,10 @@ describe('count_by_field processor', () => { job_prom_metrics_port: testConfig.job_prom_metrics_port, job_prom_metrics_add_default: testConfig.job_prom_metrics_add_default, prom_metrics_display_url: - harness.context.sysconfig.terafoundation.prom_metrics_display_url + harness.context.sysconfig.terafoundation.prom_metrics_display_url, + labels: { + assignment: 'worker' + } }); await harness.initialize(); diff --git a/test/count_unique/processor-spec.ts b/test/count_unique/processor-spec.ts new file mode 100644 index 00000000..0a4ab24a --- /dev/null +++ b/test/count_unique/processor-spec.ts @@ -0,0 +1,117 @@ +import { DataEntity } from '@terascope/utils'; +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { CountUniqueConfig } from '../../asset/src/count_unique/interfaces.js'; + +describe('count_unique', () => { + let harness: WorkerTestHarness; + + const data = [ + { + id: 1, + name: 'joe' + }, + { + id: 2, + type: 'string', + name: 'joe' + }, + { + id: 1, + name: 'frank' + }, + { + id: 3, + name: 'frank.bob' + }, + { + id: 1, + name: 'frank' + }, + { + id: 1, + name: 'joe' + }, + ]; + + function convertToDE(docArray: Record[], keyField: string) { + return docArray.map((doc) => DataEntity.make(doc, { _key: doc[keyField] })); + } + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'count_unique', + preserve_fields: ['type'] + }; + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('generate an empty result if no input data', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results.length).toBe(0); + }); + + it('verify correct counts', async () => { + const testData = convertToDE(data, 'id'); + harness = await makeTest(); + + const results = await harness.runSlice(testData); + + expect(results).toEqual([ + { count: 4, _key: 1 }, + { count: 1, _key: 2, type: 'string' }, + { count: 1, _key: 3 } + ]); + }); + + it('preserve field if non null value', async () => { + const test2 = [ + { + _key: 1, + type: 0, + }, + { + _key: 2, + type: 1 + }, + { + _key: 3, + type: false + } + ]; + + const testData = convertToDE(test2, '_key'); + harness = await makeTest(); + + const results = await harness.runSlice(testData); + + expect(results).toEqual([ + { count: 1, _key: 1, type: 0 }, + { count: 1, _key: 2, type: 1 }, + { count: 1, _key: 3, type: false } + ]); + }); + + it('verify correct counts when using a non-key field', async () => { + const testData = convertToDE(data, 'id'); + harness = await makeTest({ field: 'name', is_meta_field: false }); + + const results = await harness.runSlice(testData); + + expect(results).toMatchObject([ + { count: 3, _key: 'joe' }, + { count: 2, _key: 'frank' }, + { count: 1, _key: 'frank.bob' } + ]); + }); +}); diff --git a/test/count_unique/schema-spec.ts b/test/count_unique/schema-spec.ts new file mode 100644 index 00000000..4d3e30ae --- /dev/null +++ b/test/count_unique/schema-spec.ts @@ -0,0 +1,35 @@ +import 'jest-extended'; +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('count_unique schema', () => { + let harness: WorkerTestHarness; + const name = 'count_unique'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({ preserve_fields: 1234 })).toReject(); + await expect(makeSchema({ field: ['some stuff'] })).toReject(); + await expect(makeSchema({ field: true, preserve_fields: 1234 })).toReject(); + + await expect(makeSchema({ preserve_fields: ['someField'] })).toResolve(); + await expect(makeSchema({ field: 'someField' })).toResolve(); + await expect(makeSchema({ field: 'someField', preserve_fields: ['someField', 'otherField'] })).toResolve(); + }); +}); diff --git a/test/debug_routes/processor-spec.ts b/test/debug_routes/processor-spec.ts new file mode 100644 index 00000000..dc751ee9 --- /dev/null +++ b/test/debug_routes/processor-spec.ts @@ -0,0 +1,54 @@ +import { jest } from '@jest/globals'; +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { DataEntity } from '@terascope/job-components'; + +describe('debug_routes', () => { + let harness: WorkerTestHarness; + let spyOnStdout: jest.SpiedFunction; + + beforeEach(() => { + // @ts-expect-error + spyOnStdout = jest.spyOn(process.stdout, 'write').mockImplementation(() => {}); + }); + + async function makeTest() { + const baseConfig = { + _op: 'debug_routes', + }; + const opConfig = Object.assign({}, baseConfig); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) { + await harness.shutdown(); + } + spyOnStdout.mockRestore(); + }); + + it('should write to stdout and return all records', async () => { + const data = [ + DataEntity.make({ id: 1 }, { _key: '1', 'standard:route': 'a' }), + DataEntity.make({ id: 2 }, { _key: '2', 'standard:route': 'b' }), + DataEntity.make({ id: 3 }, { _key: '3', 'standard:route': 'a' }), + DataEntity.make({ id: 4 }, { _key: '4', 'standard:route': 'c' }), + DataEntity.make({ id: 5 }, { _key: '5', 'standard:route': 'a' }), + DataEntity.make({ id: 6 }, { _key: '6', 'standard:route': 'b' }), + ]; + + harness = await makeTest(); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(data.length); + + results.forEach((result, index) => { + expect(result).toMatchObject(data[index]); + }); + + expect(spyOnStdout.mock.calls[0]).toEqual(['{ a: 3, b: 2, c: 1 }\n']); + }); +}); diff --git a/test/filter/processor-spec.ts b/test/filter/processor-spec.ts new file mode 100644 index 00000000..1183f996 --- /dev/null +++ b/test/filter/processor-spec.ts @@ -0,0 +1,818 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { cloneDeep } from '@terascope/job-components'; +import { FilterConfig } from '../../asset/src/filter/interfaces.js'; + +const incoming = [ + { _key: 0, ip: '28.127.246.12', name: 'francis' }, + { _key: 1, ip: '28.127.246.232', name: 'joseph' }, + { _key: 2, ip: '28.127.246.244', name: 'Johnson' }, + { _key: 3, ip: '4.17.23.6', name: 'bob' }, + { _key: 4, ip: '4.17.14.18', name: 'greg' }, +]; + +describe('filter', () => { + let harness: WorkerTestHarness; + let testData: any[]; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'filter', + }; + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + beforeEach(() => { + testData = cloneDeep(incoming); + }); + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should return empty array if input is an empty array', async () => { + harness = await makeTest({ + field: 'test', + value: 'test', + filter_by: 'match' + }); + const results = await harness.runSlice([]); + + expect(results.length).toBe(0); + }); + + it('should return docs without matching value', async () => { + harness = await makeTest({ + field: 'ip', + value: '28.127.246.232', + filter_by: 'match' + }); + const results = await harness.runSlice(testData); + + expect(results.length).toEqual(4); + }); + + it('should only return doc with matching field', async () => { + harness = await makeTest({ + field: 'ip', + value: '28.127.246.232', + filter_by: 'match', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(1); + }); + + it('should return no docs if none match and invert is true', async () => { + const testDocs = testData; + testDocs[1].ip = '8.8.8.8'; + + harness = await makeTest({ + field: 'ip', + value: '28.127.246.232', + filter_by: 'match', + invert: true + }); + const results = await harness.runSlice(testDocs); + + expect(results.length).toBe(0); + }); + + it('should filter docs with value that match regex', async () => { + harness = await makeTest({ + field: 'name', + value: '/^jo.*/i', + filter_by: 'regex', + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(3); + }); + + it('should return docs with value that match regex if invert true', async () => { + harness = await makeTest({ + field: 'name', + value: '/^jo.*/i', + filter_by: 'regex', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(2); + }); + + it('should return docs with an ip value not in the range', async () => { + harness = await makeTest({ + field: 'ip', + value: '28.127.246.0/26', + filter_by: 'ip_range', + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(4); + }); + + it('should return docs with an ip value in the given range and invert is true', async () => { + harness = await makeTest({ + field: 'ip', + value: '28.127.246.0/26', + filter_by: 'ip_range', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(1); + }); + + it('should return docs without matching values for an array of values', async () => { + harness = await makeTest({ + field: 'ip', + value: ['28.127.246.232', '4.17.14.18'], + filter_by: 'match' + }); + const results = await harness.runSlice(testData); + + expect(results.length).toEqual(3); + }); + + it('should return docs that match and arrary values if invert is true', async () => { + harness = await makeTest({ + field: 'ip', + value: ['28.127.246.232', '4.17.14.18'], + filter_by: 'match', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toEqual(2); + }); + + it('should filter docs that match regex for an array of values', async () => { + harness = await makeTest({ + field: 'name', + value: ['/^jo.*/i', '/^g.*/i'], + filter_by: 'regex', + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(2); + }); + + it('should filter docs that match regex for an array of values with invert', async () => { + harness = await makeTest({ + field: 'name', + value: ['/^jo.*/i', '/^g.*/i'], + filter_by: 'regex', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(3); + }); + + it('should return docs with an ip value outside of all ranges of array of cidr values', async () => { + harness = await makeTest({ + field: 'ip', + value: ['28.127.246.0/26', '4.17.0.0/17'], + filter_by: 'ip_range', + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(2); + }); + + it('should return docs with an ip value in any of the array of ip ranges and invert is true', async () => { + harness = await makeTest({ + field: 'ip', + value: ['28.127.246.0/26', '4.17.0.0/16'], + filter_by: 'ip_range', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(3); + }); + + it('should be able to filter array field values by regex', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + + harness = await makeTest({ + field: 'name', + value: 'bob', + filter_by: 'match', + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(2); + expect(results[0]._key).toBe(1); + expect(results[1]._key).toBe(4); + }); + + it('should return everything if no match', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + + harness = await makeTest({ + field: 'name', + value: 'grog', + filter_by: 'match', + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(5); + }); + + it('should be able to keep array field values if invert is true', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + + harness = await makeTest({ + field: 'name', + value: 'bob', + filter_by: 'match', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(3); + expect(results[0]._key).toBe(0); + expect(results[1]._key).toBe(2); + expect(results[2]._key).toBe(3); + }); + + it('should return empty array if no match and invert it true', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + + harness = await makeTest({ + field: 'name', + value: 'grog', + filter_by: 'match', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(0); + }); + + it('should be able to filter values by array index', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + testData[3].name = ['foo', 'man', 'cow']; + + harness = await makeTest({ + field: 'name', + value: 'foo', + array_index: 1, + filter_by: 'match' + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(4); + }); + + it('should return values if array_index matches and invert it true', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + testData[3].name = ['foo', 'man', 'cow']; + + harness = await makeTest({ + field: 'name', + value: 'foo', + array_index: 0, + filter_by: 'match', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(1); + expect(results[0]._key).toBe(3); + }); + + it('should return all values if array_index does not match', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + testData[3].name = ['foo', 'man', 'cow']; + + harness = await makeTest({ + field: 'name', + value: 'foo', + array_index: 2, + filter_by: 'match' + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(5); + }); + + it('should return empty array if array_index does not match and invert true', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + testData[3].name = ['foo', 'man', 'cow']; + + harness = await makeTest({ + field: 'name', + value: 'foo', + array_index: 2, + filter_by: 'match', + invert: true + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(0); + }); + + it('should not throw an error if array_index is larger than array length', async () => { + testData[0].name = ['bob', 'foo', 'man']; + testData[2].name = ['herm', 'max', 'bob']; + + harness = await makeTest({ + field: 'name', + value: 'foo', + array_index: 200, + filter_by: 'match', + }); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(5); + }); + + describe('filter with numbers', () => { + const myTestData = [ + { + _key: 1, + number: 10 + }, + { + _key: 2, + number: 1 + }, + { + _key: 3, + number: -1 + }, + { + _key: 4, + number: 5 + }, + { + _key: 5, + number: 4 + }, + ]; + + it('should filter by validator args', async () => { + harness = await makeTest({ + field: 'number', + filter_by: 'validator', + validation_function: 'inNumberRange', + validation_function_args: { + min: 0, + max: 5, + inclusive: true, + }, + invert: true + }); + const results = await harness.runSlice(myTestData); + + expect(results.length).toBe(3); + + const keys = results.map((i) => i._key); + + expect(keys.includes(2)).toBeTrue(); + expect(keys.includes(4)).toBeTrue(); + expect(keys.includes(5)).toBeTrue(); + }); + }); + + describe('filter with geo', () => { + const myTestData = [ + { + _key: 1, + boundary: { + type: 'Polygon', + coordinates: 'coords be here' + } + }, + { + _key: 2, + boundary: { + type: 'LineString', + coordinates: 'coords be here' + } + }, + { + _key: 3, + boundary: { + type: 'MultiPolygon', + coordinates: 'coords be here' + } + } + ]; + + it('should filter docs based on nested values', async () => { + harness = await makeTest({ + field: 'boundary.type', + filter_by: 'match', + value: [ + 'LineString', + 'MultiPolygon' + ] + }); + const results = await harness.runSlice(myTestData); + + expect(results.length).toBe(1); + + expect(results[0]).toEqual( + { + _key: 1, + boundary: { + type: 'Polygon', + coordinates: 'coords be here' + } + } + ); + }); + + it('should return docs based on nested values if invert is true', async () => { + harness = await makeTest({ + field: 'boundary.type', + filter_by: 'match', + value: [ + 'LineString', + 'Polygon' + ], + invert: true + }); + const results = await harness.runSlice(myTestData); + + expect(results.length).toBe(2); + + expect(results).toEqual([ + { + _key: 1, + boundary: { + type: 'Polygon', + coordinates: 'coords be here' + } + }, + { + _key: 2, + boundary: { + type: 'LineString', + coordinates: 'coords be here' + } + } + ]); + }); + }); + + describe('filter by size', () => { + const myTestData = [ + { + _key: 1, + name: 'bob', + age: 12 + }, + { + _key: 2, + name: 'reginald of the northern countries', + age: 12, + more_stuff: { + a: 'thing', + b: 'do hicky', + c: [ + 'some', + 'type', + 'of', + 'array' + ] + } + } + ]; + + it('should filter doc based on size', async () => { + harness = await makeTest({ + field: 'doc', + filter_by: 'size', + value: 100 + }); + const results = await harness.runSlice(myTestData); + + expect(results.length).toBe(1); + + expect(results[0]).toEqual( + { + _key: 1, + name: 'bob', + age: 12 + } + ); + }); + + it('should return docs over filter size', async () => { + harness = await makeTest({ + field: 'doc', + filter_by: 'size', + value: 100, + invert: true + }); + const results = await harness.runSlice(myTestData); + + expect(results.length).toBe(1); + + expect(results[0]).toEqual( + { + _key: 2, + name: 'reginald of the northern countries', + age: 12, + more_stuff: { + a: 'thing', + b: 'do hicky', + c: [ + 'some', + 'type', + 'of', + 'array' + ] + } + } + ); + }); + + it('should filter by field size', async () => { + harness = await makeTest({ + field: 'name', + filter_by: 'size', + value: 10, + }); + const results = await harness.runSlice(myTestData); + + expect(results.length).toBe(1); + + expect(results[0]).toEqual( + { + _key: 1, + name: 'bob', + age: 12 + } + ); + }); + }); + + describe('filter with exception_rules', () => { + const myTestData = [ + { + _key: 1, + name: 'bob', + age: 22 + }, + { + _key: 2, + name: 'ray', + age: 44 + }, + { + _key: 3, + name: 'ran', + age: 25 + }, + { + _key: 4, + name: 'ran', + age: 99 + }, + { + _key: 5, + name: 'ran', + age: 66 + }, + { + _key: 6, + name: 'ran', + age: 44, + favorite_baseball_team: 'STL cardinals' + } + ]; + + it('should filter correctly if exception_rules is undefined', async () => { + harness = await makeTest({ + field: 'name', + value: 'ran' + }); + const results = await harness.runSlice(myTestData); + + expect(results).toEqual([ + { + _key: 1, + name: 'bob', + age: 22 + }, + { + _key: 2, + name: 'ray', + age: 44 + } + ]); + }); + + it('should allow for records that match the exception to bypass filter', async () => { + harness = await makeTest({ + field: 'name', + value: 'ran', + exception_rules: [ + { field: 'age', value: 99 } + ] + }); + const results = await harness.runSlice(myTestData); + + expect(results).toEqual([ + { + _key: 1, + name: 'bob', + age: 22 + }, + { + _key: 2, + name: 'ray', + age: 44 + }, + { + _key: 4, + name: 'ran', + age: 99 + } + ]); + }); + + it('should allow for records that match any exception rules to bypass filter', async () => { + harness = await makeTest({ + field: 'name', + value: 'ran', + exception_rules: [ + { field: 'age', value: 99 }, + { field: '_key', value: 5 } + ] + }); + const results = await harness.runSlice(myTestData); + + expect(results).toEqual([ + { + _key: 1, + name: 'bob', + age: 22 + }, + { + _key: 2, + name: 'ray', + age: 44 + }, + { + _key: 4, + name: 'ran', + age: 99 + }, + { + _key: 5, + name: 'ran', + age: 66 + } + ]); + }); + + it('should handle a regex in the exception rules', async () => { + harness = await makeTest({ + field: 'name', + value: 'ran', + exception_rules: [ + { field: 'favorite_baseball_team', value: '/^stl/i', regex: true } + ] + }); + const results = await harness.runSlice(myTestData); + + expect(results).toEqual([ + { + _key: 1, + name: 'bob', + age: 22 + }, + { + _key: 2, + name: 'ray', + age: 44 + }, + { + _key: 6, + name: 'ran', + age: 44, + favorite_baseball_team: 'STL cardinals' + } + ]); + }); + + it('should validate exception rules', async () => { + await expect(makeTest({ + field: 'name', + value: 'ran', + exception_rules: [ + // @ts-expect-error + { field: 'favorite_baseball_team', regex: true } + ] + })).rejects.toThrow(); + }); + }); + + describe('filter with field array', () => { + const myTestData = [ + { + _key: 1, + name: 'ray', + last_name: 'bob', + age: 20 + }, + { + _key: 2, + name: 'joe', + last_name: 'ray', + age: 24 + }, + { + _key: 3, + name: 'ray', + last_name: 'smith', + age: 23 + }, + { + _key: 4, + name: 'joe', + last_name: 'smith', + age: 21 + }, + { + _key: 5, + name: 'harty', + last_name: 'day', + age: 22 + } + ]; + + it('should handle an array of fields', async () => { + harness = await makeTest({ + field: ['name', 'last_name'], + value: 'ray' + }); + const results = await harness.runSlice(myTestData); + + expect(results).toEqual([ + { + _key: 4, + name: 'joe', + last_name: 'smith', + age: 21 + }, + { + _key: 5, + name: 'harty', + last_name: 'day', + age: 22 + } + ]); + }); + + it('should handle an array of fields with invert set to true', async () => { + harness = await makeTest({ + field: ['name', 'last_name'], + value: 'ray', + invert: true + }); + const results = await harness.runSlice(myTestData); + + expect(results).toEqual([ + { + _key: 1, + name: 'ray', + last_name: 'bob', + age: 20 + }, + { + _key: 2, + name: 'joe', + last_name: 'ray', + age: 24 + }, + { + _key: 3, + name: 'ray', + last_name: 'smith', + age: 23 + } + ]); + }); + }); +}); diff --git a/test/filter/schema-spec.ts b/test/filter/schema-spec.ts new file mode 100644 index 00000000..c8aec423 --- /dev/null +++ b/test/filter/schema-spec.ts @@ -0,0 +1,56 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('filter schema', () => { + let harness: WorkerTestHarness; + const name = 'filter'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({})).toReject(); + await expect(makeSchema({ field: 1234 })).toReject(); + await expect(makeSchema({ field: 'some_destination', invert: 1234 })).toReject(); + await expect(makeSchema({ array_index: true, field: 'field' })).toReject(); + await expect(makeSchema({ field: 'field', filter_by: 'field' })).toReject(); + await expect(makeSchema({ field: 'field', validation_function: 'field' })).toReject(); + + await expect(makeSchema({ + field: [1234, 'last_name'], + value: 'ray', + invert: true + })).toReject(); + + await expect(makeSchema({ + field: undefined, + value: 'ray', + invert: true + })).toReject(); + + await expect(makeSchema({ + field: 'name', + value: 'ran', + exception_rules: [ + { field: 'favorite_baseball_team', regex: true } + ] + })).toReject(); + + await expect(makeSchema({ field: 'someField' })).toResolve(); + await expect(makeSchema({ field: 'someField', meta_key: 'some_key' })).toResolve(); + }); +}); diff --git a/test/filter_by_date/processor-spec.ts b/test/filter_by_date/processor-spec.ts new file mode 100644 index 00000000..cda58fe1 --- /dev/null +++ b/test/filter_by_date/processor-spec.ts @@ -0,0 +1,203 @@ +import { subtractFromDate, addToDate, getTime } from '@terascope/job-components'; +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { FilterByDateConfig } from '../../asset/src/filter_by_date/interfaces.js'; + +const nowDate = new Date(); +const currentTimeMilliSeconds = getTime(nowDate) as number; +const currentISO8601 = new Date().toISOString(); +const oneWeekAgoIso8601 = new Date( + Number(currentTimeMilliSeconds) - (7 * 24 * 3600 * 1000) +).toISOString(); +const veryLongTimeAgo = currentTimeMilliSeconds - (3600 * 24 * 365 * 1000000000 * 1000); +const farIntoTheFuture = currentTimeMilliSeconds + (3600 * 24 * 365 * 100000000 * 1000); + +const referenceDate = new Date().toISOString(); + +const jsonData = [ + { + id: 1, + timestamp: currentTimeMilliSeconds + }, + { + id: 3, + timestamp: veryLongTimeAgo + }, + { + id: 4, + timestamp: farIntoTheFuture + }, + { + id: 5, + timestamp: currentISO8601 + }, + { + id: 6, + timestamp: oneWeekAgoIso8601 + }, + { + id: 7, + timestamp: '2018-01-30T23:17:58.000Z' + }, + { + id: 8, + timestamp: '2040-04-29T23:17:58.000Z' + }, + { + id: 9, + timestamp: 'bad date' + }, + { + id: 10, + timestamp: 315359998474698950000 + }, + { + id: 11, + timestamp: -315359998474698950000 + }, + { + id: 12, + timestamp: 0 + }, + { + id: 13, + timestamp: 'false' + }, + { + id: 14, + timestamp: ' ' + }, + { + id: 15, + timestamp: 'unknown' + }, + { + id: 16, + timestamp: '' + }, + { + id: 17, + timestamp: true + }, + { + id: 18, + timestamp: new Date() + } +]; + +describe('filter_by_date', () => { + let harness: WorkerTestHarness; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'filter_by_date', + }; + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) { + await harness.shutdown(); + } + }); + + it('should generate an empty result if no input data', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results.length).toEqual(0); + }); + + it('should return only documents that have a date within the date guards (short range)', async () => { + harness = await makeTest({ + date_field: 'timestamp', + limit_past: '2week', + limit_future: '2day' + }); + const results = await harness.runSlice(jsonData); + + expect(results.length).toBe(4); + }); + + it('should return only documents that have a date within the date guards (large range)', async () => { + harness = await makeTest({ + date_field: 'timestamp', + limit_past: '1000Y', + limit_future: '100000day' + }); + const results = await harness.runSlice(jsonData); + + expect(results.length).toBe(7); + }); + + it('data should be unchanged by filter_by_date', async () => { + const data = [ + { timestamp: Date.now(), ip: '116.206.15.22' }, + { timestamp: Date.now(), ip: '114.125.58.223' }, + { timestamp: Date.now(), ip: '177.79.65.34' }, + { timestamp: Date.now(), ip: '223.39.145.50' }, + { timestamp: Date.now(), ip: '223.33.181.51' } + ]; + + harness = await makeTest({ + date_field: 'timestamp', + limit_past: '5Y', + limit_future: '2day' + }); + const results = await harness.runSlice(data); + + expect(results[0].ip).toEqual('116.206.15.22'); + expect(results[1].ip).toEqual('114.125.58.223'); + expect(results[2].ip).toEqual('177.79.65.34'); + expect(results[3].ip).toEqual('223.39.145.50'); + expect(results[4].ip).toBe('223.33.181.51'); + }); + + it('should handle set date and time for past guard', async () => { + const limitPast = subtractFromDate(referenceDate, { days: 2 }); + + const testData = [...new Array(5)].map((x, i) => { + const date = subtractFromDate(referenceDate, { days: i }); + const doc = { + _key: i, + date: new Date(date).toISOString() + }; + + return doc; + }); + + harness = await makeTest({ + limit_past: new Date(limitPast).toISOString(), + date_field: 'date' + }); + const results = await harness.runSlice(testData); + + expect(results.length).toEqual(3); + }); + + it('should handle set date and time for future guard', async () => { + const limitFuture = addToDate(referenceDate, { minutes: 10 }); + + const testData = [...new Array(5)].map((x, i) => { + const date = addToDate(referenceDate, { minutes: i * 10 }); + const doc = { + _key: i, + date: new Date(date).toISOString() + }; + + return doc; + }); + + harness = await makeTest({ + limit_future: new Date(limitFuture).toISOString(), + date_field: 'date' + }); + const results = await harness.runSlice(testData); + + expect(results.length).toEqual(2); + }); +}); diff --git a/test/filter_by_date/schema-spec.ts b/test/filter_by_date/schema-spec.ts new file mode 100644 index 00000000..fd1288c7 --- /dev/null +++ b/test/filter_by_date/schema-spec.ts @@ -0,0 +1,36 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig, makeISODate } from '@terascope/job-components'; + +describe('filter_by_date schema', () => { + let harness: WorkerTestHarness; + const name = 'filter_by_date'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({ date_field: ['some stuff'] })).toReject(); + await expect(makeSchema({ date_field: true, limit_past: 'field' })).toReject(); + await expect(makeSchema({ date_field: 'field', limit_past: 1234 })).toReject(); + await expect(makeSchema({ date_field: 'field', limit_past: 'hello world' })).toReject(); + + await expect(makeSchema({ date_field: 'someField' })).toResolve(); + await expect(makeSchema({ date_field: 'someField', limit_past: '1week' })).toResolve(); + await expect(makeSchema({ date_field: 'someField', limit_future: '1week' })).toResolve(); + await expect(makeSchema({ date_field: 'someField', limit_future: makeISODate() })).toResolve(); + }); +}); diff --git a/test/filter_by_required_fields/processor-spec.ts b/test/filter_by_required_fields/processor-spec.ts new file mode 100644 index 00000000..2e808c81 --- /dev/null +++ b/test/filter_by_required_fields/processor-spec.ts @@ -0,0 +1,182 @@ +import { DataEntity } from '@terascope/job-components'; +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { FilterByRequiredFieldConfig, LogicType } from '../../asset/src/filter_by_required_fields/interfaces.js'; + +describe('filter_by_required_fields', () => { + let harness: WorkerTestHarness; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'filter_by_required_fields', + required_fields: ['age', 'name', 'size'] + }; + + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should return empty array from empty values', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results.length).toBe(0); + }); + + it('should return only docs with all legit values', async () => { + const data = [ + { + age: 20, + name: 'bob1', + size: 10 + }, + { + name: 'bob2', + size: 11 + }, + { + age: 21, + size: 12 + }, + { + age: 22, + name: 'bob3', + }, + { + goop: true + }, + { + age: undefined, + name: 'bob4', + size: 13 + }, + { + age: 23, + name: 'NA', + size: 14 + }, + { + age: 24, + name: 'bob5', + size: '' + }, + { + age: 25, + name: 'bob6', + size: null + }, + { + age: 26, + name: 'bob7', + size: 15 + } + ]; + + harness = await makeTest(); + const results = await harness.runSlice(data); + + expect(results.length).toBe(4); + }); + + it('can work with OR statements', async () => { + const data = [ + { + age: 20, + name: 'bob1', + size: 10 + }, + { + name: 'bob2', + }, + { + age: 21, + size: 12 + }, + { + age: 22, + name: 'bob3', + }, + { + goop: true, + name: 'bob', + date: 'sometime' + }, + { + age: 25, + name: 'bob6', + size: null + }, + { + age: null, + name: 'bob7', + size: null + } + ]; + + harness = await makeTest({ + required_fields: ['age', 'size'], + filter_type: LogicType.OR + }); + const results = await harness.runSlice(data); + + expect(results.length).toBe(4); + }); + + it('can invert the data', async () => { + harness = await makeTest({ + required_fields: ['age', 'size'], + filter_type: LogicType.OR, + invert: true + }); + + const data = [ + { + age: 20, + name: 'bob1', + size: 10 + }, + { + name: 'bob2', + }, + { + age: 21, + size: 12 + }, + { + age: 22, + name: 'bob3', + }, + { + goop: true, + name: 'bob', + date: 'sometime' + }, + { + age: 25, + name: 'bob6', + size: null + }, + { + age: null, + name: 'bob7', + size: null + } + ]; + + const results = await harness.runSlice(data); + + expect(results.length).toBe(3); + expect(results).toEqual([ + DataEntity.make({ name: 'bob2' }), + DataEntity.make({ goop: true, name: 'bob', date: 'sometime' }), + DataEntity.make({ age: null, name: 'bob7', size: null }) + ]); + }); +}); diff --git a/test/filter_by_required_fields/schema-spec.ts b/test/filter_by_required_fields/schema-spec.ts new file mode 100644 index 00000000..46eb6a57 --- /dev/null +++ b/test/filter_by_required_fields/schema-spec.ts @@ -0,0 +1,34 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('filter_by_required_fields schema', () => { + let harness: WorkerTestHarness; + const name = 'filter_by_required_fields'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({})).toReject(); + await expect(makeSchema({ required_fields: ['someField'], filter_type: 1234 })).toReject(); + await expect(makeSchema({ required_fields: ['someField'], filter_type: 'something' })).toReject(); + await expect(makeSchema({ required_fields: ['someField'], filter_type: 'OR', invert: 1234 })).toReject(); + + await expect(makeSchema({ required_fields: ['someField'] })).toResolve(); + await expect(makeSchema({ required_fields: ['someField'], filter_type: 'OR', invert: false })).toResolve(); + }); +}); diff --git a/test/filter_by_unknown_fields/processor-spec.ts b/test/filter_by_unknown_fields/processor-spec.ts new file mode 100644 index 00000000..26c9acb8 --- /dev/null +++ b/test/filter_by_unknown_fields/processor-spec.ts @@ -0,0 +1,105 @@ +import { cloneDeep } from '@terascope/job-components'; +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { FilterByUnknownFieldsConfig } from '../../asset/src/filter_by_unknown_fields/interfaces.js'; + +const data = [ + { + name: 'joe', + age: 32, + height: 100 + }, + { + name: 'mel', + age: 20, + height: 200 + }, + { + name: 'tim', + age: 33, + height: 150, + weight: 2022 + }, + { + name: 'red', + age: 38, + height: 120 + }, + { + name: 'frey', + age: 48, + height: 125 + } +]; + +describe('filter_by_unknown_fields', () => { + let harness: WorkerTestHarness; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'filter_by_unknown_fields', + known_fields: ['name', 'age', 'height'] + }; + + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should return an empty array from an empty array', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results.length).toEqual(0); + }); + + it('should return only records that have known fields', async () => { + harness = await makeTest(); + const results = await harness.runSlice(cloneDeep(data)); + + expect(results).toEqual( + [ + { + name: 'joe', + age: 32, + height: 100 + }, + { + name: 'mel', + age: 20, + height: 200 + }, + { + name: 'red', + age: 38, + height: 120 + }, + { + name: 'frey', + age: 48, + height: 125 + } + ] + ); + }); + + it('should return only records that have unknown fields if invert is true', async () => { + harness = await makeTest({ invert: true }); + const results = await harness.runSlice(cloneDeep(data)); + + expect(results).toEqual([ + { + name: 'tim', + age: 33, + height: 150, + weight: 2022 + } + ]); + }); +}); diff --git a/test/filter_by_unknown_fields/schema-spec.ts b/test/filter_by_unknown_fields/schema-spec.ts new file mode 100644 index 00000000..4ba03d80 --- /dev/null +++ b/test/filter_by_unknown_fields/schema-spec.ts @@ -0,0 +1,32 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('filter_by_unknown_fields schema', () => { + let harness: WorkerTestHarness; + const name = 'filter_by_unknown_fields'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({})).toReject(); + await expect(makeSchema({ known_fields: ['someField'], invert: 1234 })).toReject(); + + await expect(makeSchema({ known_fields: ['someField'] })).toResolve(); + await expect(makeSchema({ known_fields: ['someField'], invert: false })).toResolve(); + }); +}); diff --git a/test/json_parser/processor-spec.ts b/test/json_parser/processor-spec.ts new file mode 100644 index 00000000..0195356c --- /dev/null +++ b/test/json_parser/processor-spec.ts @@ -0,0 +1,82 @@ +import { cloneDeep, DataEntity, isString } from '@terascope/utils'; +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('json_parser', () => { + let harness: WorkerTestHarness; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'json_parser', + }; + + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should return empty array if input is an empty array', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results.length).toBe(0); + }); + + it('should parse valid json', async () => { + const data = [ + { + _key: 1, + name: 'bob' + }, + { + _key: 2, + name: 'joe' + }, + ]; + + const rawData = makeRawDataEntities(cloneDeep(data)); + + harness = await makeTest(); + const results = await harness.runSlice(rawData); + + expect(results).toEqual(data); + }); + + it('should only return the good json', async () => { + const data = [ + 'somebadjson', + { + _key: 2, + name: 'joe' + }, + ]; + + const rawData = makeRawDataEntities(cloneDeep(data)); + + harness = await makeTest(); + const results = await harness.runSlice(rawData); + + expect(results).toEqual([{ _key: 2, name: 'joe' }]); + }); +}); + +function makeRawDataEntities(dataArray: any[]) { + return dataArray.map((doc) => { + let d = doc; + if (isString(doc)) d = {}; + const entity = DataEntity.make(d, { _key: doc._key }); + + const buf = Buffer.from(JSON.stringify(doc), 'utf8'); + + entity.setRawData(buf); + + return entity; + }); +} diff --git a/test/json_parser/schema-spec.ts b/test/json_parser/schema-spec.ts new file mode 100644 index 00000000..b8beb7ad --- /dev/null +++ b/test/json_parser/schema-spec.ts @@ -0,0 +1,34 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +// TODO: check check if api name is real and available + +describe('json_parser schema', () => { + let harness: WorkerTestHarness; + const name = 'json_parser'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({ _dead_letter_action: ['some stuff'] })).toReject(); + await expect(makeSchema({ _dead_letter_action: 1234 })).toReject(); + + await expect(makeSchema({ _dead_letter_action: 'none' })).toResolve(); + await expect(makeSchema({ _dead_letter_action: 'some_api_name' })).toResolve(); + }); +}); diff --git a/test/remove_empty_fields/processor-spec.ts b/test/remove_empty_fields/processor-spec.ts new file mode 100644 index 00000000..b8fa89a3 --- /dev/null +++ b/test/remove_empty_fields/processor-spec.ts @@ -0,0 +1,91 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; + +describe('remove_empty_fields', () => { + let harness: WorkerTestHarness; + + async function makeTest(config = {}) { + const baseConfig = { + _op: 'remove_empty_fields', + }; + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + const testData = [ + { + id: 1, + name: 'joe', + age: 102.875 + }, + { + id: 2, + name: '', + age: 23, + happy: true, + field: [], + field2: {}, + field3: undefined, + field4: null, + field5: 'UNDEFINED' + }, + { + id: 3, + name: 'bob', + age: '', + happy: false, + field7: ['thing1', 'thing2'], + field8: { foo: 'bar' } + }, + { + id: 4, + name: ' ', + age: '', + size: '' + } + ]; + + it('generate an empty result if no input data', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results.length).toBe(0); + }); + + it('remove empty fields from records', async () => { + harness = await makeTest(); + const results = await harness.runSlice(testData); + + expect(results.length).toBe(4); + + expect(results[0]).toEqual({ + id: 1, name: 'joe', age: 102.875 + }); + + expect(results[1]).toEqual({ + id: 2, + age: 23, + happy: true, + field5: 'UNDEFINED' + }); + + expect(results[2]).toEqual({ + id: 3, + name: 'bob', + happy: false, + field7: ['thing1', 'thing2'], + field8: { foo: 'bar' } + }); + + expect(results[3]).toEqual({ + id: 4 + }); + }); +}); diff --git a/test/remove_empty_fields/schema-spec.ts b/test/remove_empty_fields/schema-spec.ts new file mode 100644 index 00000000..9109ee29 --- /dev/null +++ b/test/remove_empty_fields/schema-spec.ts @@ -0,0 +1,28 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('remove_empty_fields schema', () => { + let harness: WorkerTestHarness; + const name = 'remove_empty_fields'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('can be instantiated, but has no fields', async () => { + await expect(makeSchema({})).toResolve(); + }); +}); diff --git a/test/routed_sender/processor-spec.ts b/test/routed_sender/processor-spec.ts index e4411c3a..d4238832 100644 --- a/test/routed_sender/processor-spec.ts +++ b/test/routed_sender/processor-spec.ts @@ -1,8 +1,6 @@ import 'jest-extended'; import { WorkerTestHarness, newTestJobConfig } from 'teraslice-test-harness'; -import { - isEmpty, DataEntity, get, RouteSenderAPI -} from '@terascope/job-components'; +import { isEmpty, DataEntity, get, RouteSenderAPI } from '@terascope/job-components'; import path from 'node:path'; import { fileURLToPath } from 'node:url'; import TestApi from '../fixtures/someAssetId/test_api/api.js'; diff --git a/test/sample_exact/processor-spec.ts b/test/sample_exact/processor-spec.ts new file mode 100644 index 00000000..6e02ebe9 --- /dev/null +++ b/test/sample_exact/processor-spec.ts @@ -0,0 +1,117 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { SampleExactConfig } from '../../asset/src/sample_exact/interfaces.js'; + +describe('sample_exact', () => { + let harness: WorkerTestHarness; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'sample_exact', + }; + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('with default settings, should return empty array from empty array', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results).toEqual([]); + }); + + it('with default settings, should return all the data', async () => { + const data = makeData(10); + harness = await makeTest(); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(10); + }); + + it('shuffles the data', async () => { + const data = makeData(10); + harness = await makeTest(); + const results = await harness.runSlice(data); + + const outOfOrder = results.some((record, index) => { + return record._key !== data[index]._key; + }); + + expect(outOfOrder).toBeTrue(); + }); + + it('with 0%, should return none of the data', async () => { + const data = makeData(10); + harness = await makeTest({ percent_kept: 0 }); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(0); + }); + + it('with 50%, should return half all the data', async () => { + const data = makeData(10); + harness = await makeTest({ percent_kept: 50 }); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(5); + }); + + it('with 100%, should return all data', async () => { + const data = makeData(10); + harness = await makeTest({ percent_kept: 100 }); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(10); + }); + + it('with small data, and a high enough percentage, will return 0', async () => { + const data = makeData(3); + harness = await makeTest({ percent_kept: 25 }); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(0); + }); + + it('with large datasets and 95%', async () => { + const data = makeData(10000); + harness = await makeTest({ percent_kept: 95 }); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(9500); + }); + + it('with large datasets and 50%', async () => { + const data = makeData(10000); + harness = await makeTest({ percent_kept: 50 }); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(5000); + }); +}); + +interface FakeData { + _key: number; + name: string; + age: string; +} + +function makeData(n: number): FakeData[] { + const bunchesOData = []; + + for (let i = 0; i < n; i++) { + bunchesOData.push({ + _key: i, + name: 'name', + age: 'age' + }); + } + + return bunchesOData; +} diff --git a/test/sample_exact/schema-spec.ts b/test/sample_exact/schema-spec.ts new file mode 100644 index 00000000..22ffcad8 --- /dev/null +++ b/test/sample_exact/schema-spec.ts @@ -0,0 +1,33 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('sample_exact schema', () => { + let harness: WorkerTestHarness; + const name = 'sample_exact'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({ percent_kept: 1234 })).toReject(); + await expect(makeSchema({ percent_kept: ['some stuff'] })).toReject(); + await expect(makeSchema({ percent_kept: null })).toReject(); + + await expect(makeSchema({})).toResolve(); + await expect(makeSchema({ percent_kept: 50 })).toResolve(); + }); +}); diff --git a/test/sample_random/processor-spec.ts b/test/sample_random/processor-spec.ts new file mode 100644 index 00000000..b3b7adaa --- /dev/null +++ b/test/sample_random/processor-spec.ts @@ -0,0 +1,100 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { SampleRandomConfig } from '../../asset/src/sample_random/interfaces.js'; + +describe('sample_random', () => { + let harness: WorkerTestHarness; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'sample_random', + }; + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('with default settings, should return empty array from empty array', async () => { + harness = await makeTest(); + const results = await harness.runSlice([]); + + expect(results).toEqual([]); + }); + + it('with default settings, should return all the data', async () => { + const data = makeData(10); + harness = await makeTest(); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(10); + }); + + it('with 0%, should return none the data', async () => { + const data = makeData(10); + harness = await makeTest({ probability_to_keep: 0 }); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(0); + }); + + it('with 50%, should return half all the data', async () => { + const data = makeData(10); + harness = await makeTest({ probability_to_keep: 50 }); + const results = await harness.runSlice(data); + + expect(results.length).toBeLessThan(10); + expect(results.length).toBeGreaterThan(0); + }); + + it('with 100%, should return all data', async () => { + const data = makeData(10); + harness = await makeTest({ probability: 100 }); + const results = await harness.runSlice(data); + + expect(results.length).toEqual(10); + }); + + it('with large datasets and 95%', async () => { + const data = makeData(10000); + harness = await makeTest({ probability_to_keep: 95 }); + const results = await harness.runSlice(data); + + expect(results.length).toBeLessThan(9800); + expect(results.length).toBeGreaterThan(9200); + }); + + it('with large datasets and 50%', async () => { + const data = makeData(10000); + harness = await makeTest({ probability_to_keep: 50 }); + const results = await harness.runSlice(data); + + expect(results.length).toBeLessThan(5400); + expect(results.length).toBeGreaterThan(4600); + }); +}); + +interface FakeData { + _key: number; + name: string; + age: string; +} + +function makeData(n: number): FakeData[] { + const bunchesOData = []; + + for (let i = 0; i < n; i++) { + bunchesOData.push({ + _key: i, + name: 'name', + age: 'age' + }); + } + + return bunchesOData; +} diff --git a/test/sample_random/schema-spec.ts b/test/sample_random/schema-spec.ts new file mode 100644 index 00000000..d1ec3254 --- /dev/null +++ b/test/sample_random/schema-spec.ts @@ -0,0 +1,33 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('sample_random schema', () => { + let harness: WorkerTestHarness; + const name = 'sample_random'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({ probability_to_keep: 1234 })).toReject(); + await expect(makeSchema({ probability_to_keep: ['some stuff'] })).toReject(); + await expect(makeSchema({ probability_to_keep: null })).toReject(); + + await expect(makeSchema({})).toResolve(); + await expect(makeSchema({ probability_to_keep: 50 })).toResolve(); + }); +}); diff --git a/test/set_field_conditional/processor-spec.ts b/test/set_field_conditional/processor-spec.ts new file mode 100644 index 00000000..e9d33076 --- /dev/null +++ b/test/set_field_conditional/processor-spec.ts @@ -0,0 +1,89 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { SetFieldConditionalConfig } from '../../asset/src/set_field_conditional/interfaces.js'; + +const testData = [ + { + id: 1, + type: 'data1' + }, + { + id: 2, + type: 'data2' + }, + { + id: 3, + type: 'data3' + } +]; + +const testData2 = [ + { + id: 1, + test_prop: 'value' + }, + { + id: 2, + test_prop: 10 + }, + { + id: 3, + test_prop: 100 + }, + { + id: 4, + test_prop: 1000, + type: 'data3' + } +]; + +describe('set_field_conditional', () => { + let harness: WorkerTestHarness; + + async function makeTest(config: Partial = {}) { + const baseConfig = { + _op: 'set_field_conditional', + }; + const opConfig = Object.assign({}, baseConfig, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + return harness; + } + + const valueCheckOpConfig = { + _op: 'set_field_conditional', + conditional_field: 'type', + conditional_values: ['data1', 'data2'], + set_field: 'test_prop', + value: true + }; + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('generate an empty result if no input data', async () => { + harness = await makeTest(valueCheckOpConfig); + const results = await harness.runSlice([]); + + expect(results.length).toBe(0); + }); + + it('should correctly set the field in all records with type:data1 and type:data2', async () => { + harness = await makeTest(valueCheckOpConfig); + const results = await harness.runSlice(testData); + + expect(results[0].test_prop).toBe(true); + expect(results[1].test_prop).toBe(true); + }); + + it('should not update fields that do not match conditional_values', async () => { + harness = await makeTest(valueCheckOpConfig); + const results = await harness.runSlice(testData2); + + expect(results[0].test_prop).toBe('value'); + expect(results[1].test_prop).toBe(10); + expect(results[2].test_prop).toBe(100); + }); +}); diff --git a/test/set_field_conditional/schema-spec.ts b/test/set_field_conditional/schema-spec.ts new file mode 100644 index 00000000..d71faa08 --- /dev/null +++ b/test/set_field_conditional/schema-spec.ts @@ -0,0 +1,39 @@ +import { WorkerTestHarness } from 'teraslice-test-harness'; +import { OpConfig } from '@terascope/job-components'; + +describe('set_field_conditional schema', () => { + let harness: WorkerTestHarness; + const name = 'set_field_conditional'; + + async function makeSchema(config: Record = {}): Promise { + const opConfig = Object.assign({}, { _op: name }, config); + harness = WorkerTestHarness.testProcessor(opConfig); + + await harness.initialize(); + + const validConfig = harness.executionContext.config.operations.find( + (testConfig) => testConfig._op === name + ); + + return validConfig as OpConfig; + } + + afterEach(async () => { + if (harness) await harness.shutdown(); + }); + + it('should expect to be properly configured', async () => { + await expect(makeSchema({})).toReject(); + await expect(makeSchema({ conditional_field: 1234 })).toReject(); + await expect(makeSchema({ conditional_field: 1234, set_field: 'world' })).toReject(); + await expect(makeSchema({ conditional_field: 'hello', set_field: 1234 })).toReject(); + + await expect(makeSchema({ conditional_field: 'hello', set_field: 'world' })).toResolve(); + await expect(makeSchema({ + conditional_field: 'hello', + set_field: 'world', + conditional_values: [null], + value: true, + })).toResolve(); + }); +}); diff --git a/tsconfig.json b/tsconfig.json index 4f17a111..19370bac 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -21,7 +21,6 @@ "./node_modules/@types" ] }, - "include": ["test"], "references": [ { "path": "packages/standard-asset-apis" diff --git a/yarn.lock b/yarn.lock index 5bba4343..ceb10259 100644 --- a/yarn.lock +++ b/yarn.lock @@ -366,7 +366,7 @@ dependencies: levn "^0.4.1" -"@faker-js/faker@^9.2.0": +"@faker-js/faker@~9.2.0": version "9.2.0" resolved "https://registry.yarnpkg.com/@faker-js/faker/-/faker-9.2.0.tgz#269ee3a5d2442e88e10d984e106028422bcb9551" integrity sha512-ulqQu4KMr1/sTFIYvqSdegHT8NIkt66tFAkugGnHA+1WAfEn6hMzNR+svjXGFRVLnapxvej67Z/LwchFrnLBUg== @@ -764,10 +764,10 @@ resolved "https://registry.yarnpkg.com/@sinclair/typebox/-/typebox-0.27.8.tgz#6667fac16c436b5434a387a34dedb013198f6e6e" integrity sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA== -"@sindresorhus/fnv1a@^2.0.1": - version "2.0.1" - resolved "https://registry.yarnpkg.com/@sindresorhus/fnv1a/-/fnv1a-2.0.1.tgz#2aefdfa7eb5b7f29a7936978218e986c70c603fc" - integrity sha512-suq9tRQ6bkpMukTG5K5z0sPWB7t0zExMzZCdmYm6xTSSIm/yCKNm7VCL36wVeyTsFr597/UhU1OAYdHGMDiHrw== +"@sindresorhus/fnv1a@~3.1.0": + version "3.1.0" + resolved "https://registry.yarnpkg.com/@sindresorhus/fnv1a/-/fnv1a-3.1.0.tgz#f8e46597298f6fd4c12dc901cdd4e73beb4d24fa" + integrity sha512-KV321z5m/0nuAg83W1dPLy85HpHDk7Sdi4fJbwvacWsEhAh+rZUW4ZfGcXmUIvjZg4ss2bcwNlRhJ7GBEUG08w== "@sindresorhus/is@^4.0.0": version "4.6.0" @@ -828,14 +828,14 @@ dependencies: defer-to-connect "^2.0.1" -"@terascope/data-mate@^1.3.2": - version "1.3.2" - resolved "https://registry.yarnpkg.com/@terascope/data-mate/-/data-mate-1.3.2.tgz#cdd19b9b59f164fdbaed65d63190808c03d972d5" - integrity sha512-Xx0dClc0Ghp75UYUXXB++IZfR/JoBIP5YsbT6HqxV4NQuLYZW5CG79XdeUmVzhF0r9tXvqMTPv1VBR+w4LkyXw== +"@terascope/data-mate@^1.4.0", "@terascope/data-mate@~1.4.0": + version "1.4.0" + resolved "https://registry.yarnpkg.com/@terascope/data-mate/-/data-mate-1.4.0.tgz#2fa1175c2c83d468cb994d7bafdf5008aa43fa99" + integrity sha512-dI+E7qxK9XdxAG8sIDGYAFI+XLNHPSsHpfGQhkSAphBFnOZfzgqK2bGam4evNTME3+zMz0fClC1RTpQvhbDbXw== dependencies: - "@terascope/data-types" "^1.3.2" - "@terascope/types" "^1.2.0" - "@terascope/utils" "^1.3.2" + "@terascope/data-types" "^1.4.0" + "@terascope/types" "^1.3.0" + "@terascope/utils" "^1.4.0" "@types/validator" "^13.12.2" awesome-phonenumber "^7.2.0" date-fns "^4.1.0" @@ -849,20 +849,30 @@ uuid "^10.0.0" valid-url "^1.0.9" validator "^13.12.0" - xlucene-parser "^1.3.2" + xlucene-parser "^1.4.0" -"@terascope/data-types@^1.3.2": - version "1.3.2" - resolved "https://registry.yarnpkg.com/@terascope/data-types/-/data-types-1.3.2.tgz#1deb01c1b77e479eef057f9e357c8052e5a84e70" - integrity sha512-L5IKhTJ5nlKtd610qhRf1IL9Km0RKnxCFHPeh7HvV2Nu2cWR/hg8lJHGUfX0IbG1ZeMJyA5L5+Odahug9UPW5g== +"@terascope/data-types@^1.4.0": + version "1.4.0" + resolved "https://registry.yarnpkg.com/@terascope/data-types/-/data-types-1.4.0.tgz#2072dd09d1ae7e7da585b12943d75da4b35dcc09" + integrity sha512-tuyKT8BWhEaiX4PYaJQw3RzigfZ/K20RnHTgP9i0M3UNlaJYLYq2t9dPwCdsmUIqzfitBJ/2oUwzA1jBoNPZ5g== dependencies: - "@terascope/types" "^1.2.0" - "@terascope/utils" "^1.3.2" + "@terascope/types" "^1.3.0" + "@terascope/utils" "^1.4.0" graphql "^16.9.0" lodash "^4.17.21" yargs "^17.7.2" -"@terascope/eslint-config@^1.1.0": +"@terascope/elasticsearch-api@^4.4.0": + version "4.4.0" + resolved "https://registry.yarnpkg.com/@terascope/elasticsearch-api/-/elasticsearch-api-4.4.0.tgz#4f9eb25702720a8b0b8c1e8f20d0fd76209c4485" + integrity sha512-Z4ubPscGSVniTI2zcmTfPqAT6xtcNFvmWCLYdVbmcQkA5VJMvyhjj4RPqe9UhQEnH+s/krTRgxIiZOAbC757nA== + dependencies: + "@terascope/types" "^1.3.0" + "@terascope/utils" "^1.4.0" + bluebird "^3.7.2" + setimmediate "^1.0.5" + +"@terascope/eslint-config@~1.1.0": version "1.1.0" resolved "https://registry.yarnpkg.com/@terascope/eslint-config/-/eslint-config-1.1.0.tgz#07a720beacb7efb48dc1d5b01d585832e001df57" integrity sha512-gAfV8x5FPbbCu+pwRgu0b+VJ3VKsLu8l/ziOnO2ATo4zZXhXulQDuPiYoz/ZPem/YdwYgqZDL1Hr2haZerBudQ== @@ -896,13 +906,13 @@ progress "^2.0.3" yargs "^17.2.1" -"@terascope/job-components@^1.5.1": - version "1.5.1" - resolved "https://registry.yarnpkg.com/@terascope/job-components/-/job-components-1.5.1.tgz#a36e8b3a94ed389e16117d7500cdd3bdcfc40f97" - integrity sha512-rMHF/mJbv1K5SyJWQ1N+yuK6icntu+kHPyxcdcEvQ9F4Ci4NjJn8zUKdE305Rpdmz2aTx0bDiCXGQNNwVO2sVQ== +"@terascope/job-components@~1.6.0": + version "1.6.0" + resolved "https://registry.yarnpkg.com/@terascope/job-components/-/job-components-1.6.0.tgz#3def9d2f09e777974c923c9ed72c93a59cd2b6b6" + integrity sha512-0UoWLgQROx4CSqtPEBnkb/pbudl9o+vF9iX9N1a+VXORrzHUtnMr7DJZ4YtSZFdwaAeZI7gSvlan9YsEW6xS3g== dependencies: - "@terascope/types" "^1.2.0" - "@terascope/utils" "^1.3.1" + "@terascope/types" "^1.3.0" + "@terascope/utils" "^1.4.0" convict "^6.2.4" convict-format-with-moment "^6.2.0" convict-format-with-validator "^6.2.0" @@ -911,10 +921,10 @@ prom-client "^15.1.3" uuid "^10.0.0" -"@terascope/scripts@^1.4.2": - version "1.4.2" - resolved "https://registry.yarnpkg.com/@terascope/scripts/-/scripts-1.4.2.tgz#1b9efe4c8418d32626760f3118377f54ec24b155" - integrity sha512-h++D7VOe9w+D+k7c4DCitjfybqiJruOAu9yAt1PYZV0B9jvUdd7+DQ6KMk2WjtsML3GkDZJbGe2BQAUWPcORXA== +"@terascope/scripts@~1.5.0": + version "1.5.0" + resolved "https://registry.yarnpkg.com/@terascope/scripts/-/scripts-1.5.0.tgz#474127ce9152c465cc40a8273177146bea880ec8" + integrity sha512-IHrVWsd8qx9yUdMomjVbbjb7d8BUZZjQQv3BeQ57MTza9Ic+Zn2GseNTNATgpaHxRHCz3YZbRsoZ24Q7zjf0Yw== dependencies: "@kubernetes/client-node" "^0.22.0" "@terascope/utils" "^1.3.2" @@ -940,6 +950,14 @@ typedoc-plugin-markdown "~4.0.3" yargs "^17.7.2" +"@terascope/teraslice-state-storage@~1.4.0": + version "1.4.0" + resolved "https://registry.yarnpkg.com/@terascope/teraslice-state-storage/-/teraslice-state-storage-1.4.0.tgz#f256ab881ea0fb7b497bb4f190254014f2153271" + integrity sha512-D15xAngDUwO9vQio5fpNIKgk601PjfBF7zuVMXIIylZCyPQSa7RLUAMeKFISjUyy54PegporXy/wKM7sGC7pWw== + dependencies: + "@terascope/elasticsearch-api" "^4.4.0" + "@terascope/utils" "^1.4.0" + "@terascope/types@^1.2.0": version "1.2.0" resolved "https://registry.yarnpkg.com/@terascope/types/-/types-1.2.0.tgz#cbad101f0153698450d6e99be7bdac88b883b779" @@ -947,7 +965,14 @@ dependencies: prom-client "^15.1.3" -"@terascope/utils@^1.3.1", "@terascope/utils@^1.3.2": +"@terascope/types@^1.3.0": + version "1.3.0" + resolved "https://registry.yarnpkg.com/@terascope/types/-/types-1.3.0.tgz#5b890fd84a10cd08e66d9f979b729a8a9d2b1d60" + integrity sha512-Dlz7xK4J2aj86YqMyYBFCBe1JsfVEgXHlCDr8pTr+O/5LEpWhEe256c+2Vq1GR5mDuEfmpDk3PkMNOajlxdsTQ== + dependencies: + prom-client "^15.1.3" + +"@terascope/utils@^1.3.2": version "1.3.2" resolved "https://registry.yarnpkg.com/@terascope/utils/-/utils-1.3.2.tgz#c9f22523a1dc0d578c1c3428303f0f2076edbf69" integrity sha512-HNnZtkwQjOyH907lt4wX1avNtyyE1anXzvbBTc9YuX9SPVbeff92+mjYL+6qWtuv/TiHlKBT77+F1i5eB8WEuA== @@ -989,6 +1014,48 @@ shallow-clone "^3.0.1" validator "^13.12.0" +"@terascope/utils@^1.4.0", "@terascope/utils@~1.4.0": + version "1.4.0" + resolved "https://registry.yarnpkg.com/@terascope/utils/-/utils-1.4.0.tgz#92001274fc8b3becc75b80b3ba5d85d6d932a73c" + integrity sha512-7BnKfzDx0YEW8EdJI4JCYS7rmdI1h1M3CMaiyEsD81BKUF8eyid9z841BlzuqUzjIxTlpRuF5TOddwxsbq4hRg== + dependencies: + "@chainsafe/is-ip" "^2.0.2" + "@terascope/types" "^1.3.0" + "@turf/bbox" "^7.1.0" + "@turf/bbox-polygon" "^7.1.0" + "@turf/boolean-contains" "^7.1.0" + "@turf/boolean-disjoint" "^7.1.0" + "@turf/boolean-equal" "^7.1.0" + "@turf/boolean-intersects" "^7.1.0" + "@turf/boolean-point-in-polygon" "^7.1.0" + "@turf/boolean-within" "^7.1.0" + "@turf/circle" "^7.1.0" + "@turf/helpers" "^7.1.0" + "@turf/invariant" "^7.1.0" + "@turf/line-to-polygon" "^7.1.0" + "@types/lodash-es" "^4.17.12" + "@types/validator" "^13.12.2" + awesome-phonenumber "^7.2.0" + date-fns "^4.1.0" + date-fns-tz "^3.2.0" + datemath-parser "^1.0.6" + debug "^4.3.7" + geo-tz "^8.1.1" + ip-bigint "^8.2.0" + ip-cidr "^4.0.2" + ip6addr "^0.2.5" + ipaddr.js "^2.2.0" + is-cidr "^5.1.0" + is-plain-object "^5.0.0" + js-string-escape "^1.0.1" + kind-of "^6.0.3" + latlon-geohash "^2.0.0" + lodash-es "^4.17.21" + mnemonist "^0.39.8" + p-map "^7.0.2" + shallow-clone "^3.0.1" + validator "^13.12.0" + "@tootallnate/once@1": version "1.1.2" resolved "https://registry.yarnpkg.com/@tootallnate/once/-/once-1.1.2.tgz#ccb91445360179a04e7fe6aff78c00ffc1eeaf82" @@ -1258,7 +1325,7 @@ resolved "https://registry.yarnpkg.com/@types/caseless/-/caseless-0.12.5.tgz#db9468cb1b1b5a925b8f34822f1669df0c5472f5" integrity sha512-hWtVTC2q7hc7xZ/RLbxapMvDMgUnDvKvMOpKal4DrMyfGBUfB1oKaZlIRr6mJL+If3bAP6sV/QneGzF6tJjZDg== -"@types/chance@^1.1.4": +"@types/chance@~1.1.4": version "1.1.6" resolved "https://registry.yarnpkg.com/@types/chance/-/chance-1.1.6.tgz#2fe3de58742629602c3fbab468093b27207f04ad" integrity sha512-V+pm3stv1Mvz8fSKJJod6CglNGVqEQ6OyuqitoDkWywEODM/eJd1eSuIp9xt6DrX8BWZ2eDSIzbw1tPCUTvGbQ== @@ -1300,7 +1367,7 @@ "@types/range-parser" "*" "@types/send" "*" -"@types/express@^4.17.19": +"@types/express@~4.17.19": version "4.17.21" resolved "https://registry.yarnpkg.com/@types/express/-/express-4.17.21.tgz#c26d4a151e60efe0084b23dc3369ebc631ed192d" integrity sha512-ejlPM315qwLpaQlQDTjPdsUFSc6ZsP4AN6AlWnogPjQ7CVi7PYF3YVz+CY3jE2pwYf7E/7HlDAN0rV2GxTG0HQ== @@ -1351,7 +1418,7 @@ dependencies: "@types/istanbul-lib-report" "*" -"@types/jest@^29.5.14": +"@types/jest@~29.5.14": version "29.5.14" resolved "https://registry.yarnpkg.com/@types/jest/-/jest-29.5.14.tgz#2b910912fa1d6856cadcd0c1f95af7df1d6049e5" integrity sha512-ZN+4sdnLUbo8EVvVc2ao0GFW6oVrQRPn4K2lglySj7APvSrgzxHiNNK99us4WDMi57xxA2yggblIAMNhXOotLQ== @@ -1369,7 +1436,7 @@ resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.15.tgz#596a1747233694d50f6ad8a7869fcb6f56cf5841" integrity sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA== -"@types/json2csv@^5.0.7": +"@types/json2csv@~5.0.7": version "5.0.7" resolved "https://registry.yarnpkg.com/@types/json2csv/-/json2csv-5.0.7.tgz#c80ff09b669f8e3ee60be19d91326275a6a2346b" integrity sha512-Ma25zw9G9GEBnX8b12R4EYvnFT6dBh8L3jwsN5EUFXa+fl2dqmbLDbNWN0XuQU3rSXdsbBeCYjI9uHU2PUBxhA== @@ -1396,16 +1463,21 @@ "@types/lodash" "*" "@types/lodash@*": - version "4.17.7" - resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.17.7.tgz#2f776bcb53adc9e13b2c0dfd493dfcbd7de43612" - integrity sha512-8wTvZawATi/lsmNu10/j2hk1KEP0IvjubqPE3cu1Xz7xfXXt5oCq3SNUz4fMIP4XGF9Ky+Ue2tBA3hcS7LSBlA== + version "4.17.13" + resolved "https://registry.yarnpkg.com/@types/lodash/-/lodash-4.17.13.tgz#786e2d67cfd95e32862143abe7463a7f90c300eb" + integrity sha512-lfx+dftrEZcdBPczf9d0Qv0x+j/rfNCMuC6OcfXmO8gkfeNAY88PgKUbvG56whcN23gc27yenwF6oJZXGFpYxg== "@types/mime@^1": version "1.3.5" resolved "https://registry.yarnpkg.com/@types/mime/-/mime-1.3.5.tgz#1ef302e01cf7d2b5a0fa526790c9123bf1d06690" integrity sha512-/pyBZWSLD2n0dcHE3hq8s8ZvcETHtEuF+3E7XVt0Ig2nvsVQXdghHVcEkIWjy9A0wKfTn97a/PSDYohKIlnP/w== -"@types/node-gzip@^1.1.0": +"@types/ms@^0.7.34": + version "0.7.34" + resolved "https://registry.yarnpkg.com/@types/ms/-/ms-0.7.34.tgz#10964ba0dee6ac4cd462e2795b6bebd407303433" + integrity sha512-nG96G3Wp6acyAgJqGasjODb+acrI7KltPiRxzHPXnP3NgI28bpQDRv53olbqGXbfcgF5aiiHmO3xpwEpS5Ld9g== + +"@types/node-gzip@~1.1.0": version "1.1.3" resolved "https://registry.yarnpkg.com/@types/node-gzip/-/node-gzip-1.1.3.tgz#cc9c78ecc58b2e19aa9fe6f6a95fd8a36062cb27" integrity sha512-gonKbqhKCTrnTpgM5VoVIILYF6odOS4nN2xaIkOUq8ckdrbD3PyF6h5SHIM23eHK/Q1dpHAQsWk5v2WUW7q14Q== @@ -1419,13 +1491,20 @@ dependencies: undici-types "~6.19.2" -"@types/node@^22.0.0", "@types/node@^22.7.4": +"@types/node@^22.0.0": version "22.7.4" resolved "https://registry.yarnpkg.com/@types/node/-/node-22.7.4.tgz#e35d6f48dca3255ce44256ddc05dee1c23353fcc" integrity sha512-y+NPi1rFzDs1NdQHHToqeiX2TIS79SWEAw9GYhkkx8bD0ChpfqC+n2j5OXOCpzfojBEBt6DnEnnG9MY0zk1XLg== dependencies: undici-types "~6.19.2" +"@types/node@~22.9.0": + version "22.9.0" + resolved "https://registry.yarnpkg.com/@types/node/-/node-22.9.0.tgz#b7f16e5c3384788542c72dc3d561a7ceae2c0365" + integrity sha512-vuyHg81vvWA1Z1ELfvLko2c8f34gyA0zaic0+Rllc5lbCnbSyuvb2Oxpm6TAUAC/2xZN3QGqxBNggD1nNR2AfQ== + dependencies: + undici-types "~6.19.8" + "@types/qs@*": version "6.9.15" resolved "https://registry.yarnpkg.com/@types/qs/-/qs-6.9.15.tgz#adde8a060ec9c305a82de1babc1056e73bd64dce" @@ -1480,7 +1559,7 @@ resolved "https://registry.yarnpkg.com/@types/stack-utils/-/stack-utils-2.0.3.tgz#6209321eb2c1712a7e7466422b8cb1fc0d9dd5d8" integrity sha512-9aEbYZ3TbYMznPdcdr3SmIrLXwC/AKZXQeCf9Pgao5CKb8CyHuEX5jzWPTkvregvhRJHcpRO6BFoGW9ycaOkYw== -"@types/timsort@^0.3.0": +"@types/timsort@~0.3.0": version "0.3.3" resolved "https://registry.yarnpkg.com/@types/timsort/-/timsort-0.3.3.tgz#c0fbc55340b01f3426530a79a7fbc81567f56029" integrity sha512-2pT/gAirKjCe0L1PCvO4ArPhYgyVfjCuKoFhU6NIIvN2+HzwJXQvakww1nzkJrujI8/R43/bfls+AErF+6xrIg== @@ -2069,6 +2148,11 @@ bl@^1.0.0: readable-stream "^2.3.5" safe-buffer "^5.1.1" +bluebird@^3.7.2: + version "3.7.2" + resolved "https://registry.yarnpkg.com/bluebird/-/bluebird-3.7.2.tgz#9f229c15be272454ffa973ace0dbee79a1b0c36f" + integrity sha512-XpNj6GDQzdfW+r2Wnn7xiSAd7TM3jzkxGXBGTtWKuSXv1xUV+azxAm8jdWZN06QTQk+2N2XB9jRDkvbmQmcRtg== + body-parser@1.20.3: version "1.20.3" resolved "https://registry.yarnpkg.com/body-parser/-/body-parser-1.20.3.tgz#1953431221c6fb5cd63c4b36d53fab0928e548c6" @@ -2268,7 +2352,7 @@ chalk@^4.0.0, chalk@^4.0.2: ansi-styles "^4.1.0" supports-color "^7.1.0" -chance@^1.1.12: +chance@~1.1.12: version "1.1.12" resolved "https://registry.yarnpkg.com/chance/-/chance-1.1.12.tgz#6a263cf241674af50a1b903357f9d328a6f252fb" integrity sha512-vVBIGQVnwtUG+SYe0ge+3MvF78cvSpuCOEUJr7sVEk2vSBuMW6OXNJjSzdtzrlxNUEaoqH2GBd5Y/+18BEB01Q== @@ -3109,7 +3193,7 @@ eslint-visitor-keys@^4.0.0, eslint-visitor-keys@^4.2.0: resolved "https://registry.yarnpkg.com/eslint-visitor-keys/-/eslint-visitor-keys-4.2.0.tgz#687bacb2af884fcdda8a6e7d65c606f46a14cd45" integrity sha512-UyLnSehNt62FFhSwjZlHmeokpRK59rcz29j+F1/aDgbkbRTk7wIc9XzdoasMUbRNKDM0qQt/+BJ4BrpFeABemw== -eslint@^9.10.0, eslint@^9.14.0: +eslint@^9.10.0, eslint@~9.14.0: version "9.14.0" resolved "https://registry.yarnpkg.com/eslint/-/eslint-9.14.0.tgz#534180a97c00af08bcf2b60b0ebf0c4d6c1b2c95" integrity sha512-c2FHsVBr87lnUtjP4Yhvk4yEhKrQavGafRA/Se1ouse8PfbfC/Qh9Mxa00yWsZRlqeUB9raXip0aiiUZkgnr9g== @@ -3247,7 +3331,7 @@ expect@^29.0.0, expect@^29.7.0: jest-message-util "^29.7.0" jest-util "^29.7.0" -express@^4.21.1: +express@~4.21.1: version "4.21.1" resolved "https://registry.yarnpkg.com/express/-/express-4.21.1.tgz#9dae5dda832f16b4eec941a4e44aa89ec481b281" integrity sha512-YSFlK1Ee0/GC8QaO91tHcDxJiE/X4FbpAyQWkxAvG6AXCuR65YzK8ua6D9hvi/TzUfZMpc+BwuM1IPw8fmQBiQ== @@ -3587,9 +3671,9 @@ gensync@^1.0.0-beta.2: integrity sha512-3hN7NaskYvMDLQY55gnW3NQ+mesEAepTqlg+VEbj7zzqEMBVNhzcGYYeqFo/TlYz6eQiFcp1HcsCZO+nGgS8zg== geo-tz@^8.1.1: - version "8.1.1" - resolved "https://registry.yarnpkg.com/geo-tz/-/geo-tz-8.1.1.tgz#0173ee2f4c27a8198393244b1d21f908a36d8f74" - integrity sha512-V6FEJ9UQOHnBD7eAOkJG3gZlc0LjKckRjt56cit6MLKMPF2qQIUBDYb0iUj4kw8l+WUeAu9ytPdSPpcLEELWtw== + version "8.1.2" + resolved "https://registry.yarnpkg.com/geo-tz/-/geo-tz-8.1.2.tgz#2c3087758acf6ff67630ce096ebc9ac4df681ff1" + integrity sha512-S1udoP7MZ+CVu+7Iy/VayVNmEHTWgfJ52TjpfC2/4f+j0SB/ZXMjGrwZTqPMo6/O2m5lrGLCFCY0bkxUqiLN+g== dependencies: "@turf/boolean-point-in-polygon" "^7.1.0" "@turf/helpers" "^7.1.0" @@ -4588,7 +4672,7 @@ jest-environment-node@^29.7.0: jest-mock "^29.7.0" jest-util "^29.7.0" -jest-extended@^4.0.2: +jest-extended@~4.0.2: version "4.0.2" resolved "https://registry.yarnpkg.com/jest-extended/-/jest-extended-4.0.2.tgz#d23b52e687cedf66694e6b2d77f65e211e99e021" integrity sha512-FH7aaPgtGYHc9mRjriS0ZEHYM5/W69tLrFTIdzm+yJgeoCmmrSB/luSfMSqWP9O29QWHPEmJ4qmU6EwsZideog== @@ -4596,7 +4680,7 @@ jest-extended@^4.0.2: jest-diff "^29.0.0" jest-get-type "^29.0.0" -jest-fixtures@^0.6.0: +jest-fixtures@~0.6.0: version "0.6.0" resolved "https://registry.yarnpkg.com/jest-fixtures/-/jest-fixtures-0.6.0.tgz#7a58475aa7f404d84c9b72d324ed0b285ba6f3ae" integrity sha512-ugqOq1HnJYgFGfmK8cc2jQbjcw4g00KqJNZfajTRZlYjnRschnmYuMrsb20aG74pg8R+zh6q72P3yPG7SnPMfA== @@ -4835,7 +4919,7 @@ jest-worker@^29.7.0: merge-stream "^2.0.0" supports-color "^8.0.0" -jest@^29.7.0: +jest@~29.7.0: version "29.7.0" resolved "https://registry.yarnpkg.com/jest/-/jest-29.7.0.tgz#994676fc24177f088f1c5e3737f5697204ff2613" integrity sha512-NIy3oAFp9shda19hy4HK0HRTWKtPJmGdnvywu01nOqNC2vZg+Z+fvJDxpMQA88eb2I9EcafcdjYgsDthnYTvGw== @@ -5333,7 +5417,7 @@ mnemonist@^0.39.8: dependencies: obliterator "^2.0.1" -mocker-data-generator@^3.0.3: +mocker-data-generator@~3.0.3: version "3.0.3" resolved "https://registry.yarnpkg.com/mocker-data-generator/-/mocker-data-generator-3.0.3.tgz#898323f042404d60a56c036919ce65533cf9e078" integrity sha512-QQDEq/EXPh4MLLbEz9G2qQjZzrB19J98cIqQBcZ5sknLdjOjgs7xenxShv70wTjpryPtoy8+Q0012uPmmflCog== @@ -5385,7 +5469,7 @@ node-int64@^0.4.0: resolved "https://registry.yarnpkg.com/node-int64/-/node-int64-0.4.0.tgz#87a9065cdb355d3182d8f94ce11188b825c68a3b" integrity sha512-O5lz91xSOeoXP6DulyHfllpq+Eg00MWitZIbtPfoSEvqIHdl5gfcY6hYzDWnj0qD5tz52PI08u9qUvSVeUBeHw== -node-notifier@^10.0.1: +node-notifier@~10.0.1: version "10.0.1" resolved "https://registry.yarnpkg.com/node-notifier/-/node-notifier-10.0.1.tgz#0e82014a15a8456c4cfcdb25858750399ae5f1c7" integrity sha512-YX7TSyDukOZ0g+gmzjB6abKu+hTGvO8+8+gIFDsRCU2t8fLV/P2unmt+LGFaIa4y64aX98Qksa97rgz4vMNeLQ== @@ -5885,7 +5969,7 @@ progress@^2.0.3: resolved "https://registry.yarnpkg.com/progress/-/progress-2.0.3.tgz#7e8cf8d8f5b8f239c1bc68beb4eb78567d572ef8" integrity sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA== -prom-client@^15.1.2, prom-client@^15.1.3: +prom-client@^15.1.3, prom-client@~15.1.2: version "15.1.3" resolved "https://registry.yarnpkg.com/prom-client/-/prom-client-15.1.3.tgz#69fa8de93a88bc9783173db5f758dc1c69fa8fc2" integrity sha512-6ZiOBfCywsD4k1BN9IX0uZhF+tJkV8q8llP64G5Hajs4JOeVLPCwpPVcpXy3BwYiUGgyJzsJJQeOIv7+hDSq8g== @@ -5978,7 +6062,7 @@ quick-lru@^5.1.1: resolved "https://registry.yarnpkg.com/quick-lru/-/quick-lru-5.1.1.tgz#366493e6b3e42a3a6885e2e99d18f80fb7a8c932" integrity sha512-WuyALRjWPDGtt/wzJiadO5AXY+8hZ80hVpe6MyivgraREW751X3SbhRvG3eLKOYN+8VEvqLcf3wdnt44Z4S4SA== -randexp@^0.5.3: +randexp@~0.5.3: version "0.5.3" resolved "https://registry.yarnpkg.com/randexp/-/randexp-0.5.3.tgz#f31c2de3148b30bdeb84b7c3f59b0ebb9fec3738" integrity sha512-U+5l2KrcMNOUPYvazA3h5ekF80FHTUG+87SEAmHZmolh1M+i/WyTCxVzmi+tidIa1tM4BSe8g2Y/D3loWDjj+w== @@ -6325,6 +6409,11 @@ set-function-name@^2.0.1, set-function-name@^2.0.2: functions-have-names "^1.2.3" has-property-descriptors "^1.0.2" +setimmediate@^1.0.5: + version "1.0.5" + resolved "https://registry.yarnpkg.com/setimmediate/-/setimmediate-1.0.5.tgz#290cbb232e306942d7d7ea9b83732ab7856f8285" + integrity sha512-MATJdZp8sLqDl/68LfQmbP8zKPLQNV6BIZoIgrscFDQ+RsvK/BxeDQOgyxKKoh0y/8h3BqVFnCqQ/gd+reiIXA== + setprototypeof@1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/setprototypeof/-/setprototypeof-1.2.0.tgz#66c9a24a73f9fc28cbe66b09fed3d33dcaf1b424" @@ -6376,7 +6465,7 @@ shiki@^0.14.7: vscode-oniguruma "^1.7.0" vscode-textmate "^8.0.0" -short-unique-id@^5.2.0: +short-unique-id@~5.2.0: version "5.2.0" resolved "https://registry.yarnpkg.com/short-unique-id/-/short-unique-id-5.2.0.tgz#a7e0668e0a8998d3151f27a36cf046055b1f270b" integrity sha512-cMGfwNyfDZ/nzJ2k2M+ClthBIh//GlZl1JEf47Uoa9XR11bz8Pa2T2wQO4bVrRdH48LrIDWJahQziKo3MjhsWg== @@ -6541,16 +6630,7 @@ string-length@^4.0.1: char-regex "^1.0.2" strip-ansi "^6.0.0" -"string-width-cjs@npm:string-width@^4.2.0": - version "4.2.3" - resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" - integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== - dependencies: - emoji-regex "^8.0.0" - is-fullwidth-code-point "^3.0.0" - strip-ansi "^6.0.1" - -string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: +"string-width-cjs@npm:string-width@^4.2.0", string-width@^4.1.0, string-width@^4.2.0, string-width@^4.2.3: version "4.2.3" resolved "https://registry.yarnpkg.com/string-width/-/string-width-4.2.3.tgz#269c7117d27b05ad2e536830a8ec895ef9c6d010" integrity sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g== @@ -6644,14 +6724,7 @@ string_decoder@~1.1.1: dependencies: safe-buffer "~5.1.0" -"strip-ansi-cjs@npm:strip-ansi@^6.0.1": - version "6.0.1" - resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" - integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== - dependencies: - ansi-regex "^5.0.1" - -strip-ansi@^6.0.0, strip-ansi@^6.0.1: +"strip-ansi-cjs@npm:strip-ansi@^6.0.1", strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.yarnpkg.com/strip-ansi/-/strip-ansi-6.0.1.tgz#9e26c63d30f53443e9489495b2105d37b67a85d9" integrity sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A== @@ -6783,7 +6856,7 @@ teeny-request@7.1.1: stream-events "^1.0.5" uuid "^8.0.0" -teraslice-test-harness@^1.2.0: +teraslice-test-harness@~1.2.0: version "1.2.0" resolved "https://registry.yarnpkg.com/teraslice-test-harness/-/teraslice-test-harness-1.2.0.tgz#7d284cfe2b52488de9834245a168b21709a838fa" integrity sha512-lbkQs0ZkARg5K5+/NJxWL5CKReibs0Yp0MINnvIDPqhK4NuRkf3dBMX1C+ofRFj8p0yxlc1mtqao0pNNQ8PXbw== @@ -6816,7 +6889,7 @@ through@^2.3.8: resolved "https://registry.yarnpkg.com/through/-/through-2.3.8.tgz#0dd4c9ffaabc357960b1b724115d7e0e86a2e1f5" integrity sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg== -timsort@^0.3.0: +timsort@~0.3.0: version "0.3.0" resolved "https://registry.yarnpkg.com/timsort/-/timsort-0.3.0.tgz#405411a8e7e6339fe64db9a234de11dc31e02bd4" integrity sha512-qsdtZH+vMoCARQtyod4imc2nIJwg9Cc7lPRrw9CzF8ZKR0khdr8+2nX80PBhET3tcyTtJDxAffGh2rXH4tyU8A== @@ -6876,7 +6949,7 @@ ts-api-utils@^1.3.0: resolved "https://registry.yarnpkg.com/ts-api-utils/-/ts-api-utils-1.3.0.tgz#4b490e27129f1e8e686b45cc4ab63714dc60eea1" integrity sha512-UQMIo7pb8WRomKR1/+MFVLTroIvDVtMX3K6OUir8ynLyzB8Jeriont2bTAtmNPa1ekAgN7YPDyf6V+ygrdU+eQ== -ts-jest@^29.2.5: +ts-jest@~29.2.5: version "29.2.5" resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-29.2.5.tgz#591a3c108e1f5ebd013d3152142cb5472b399d63" integrity sha512-KD8zB2aAZrcKIdGk4OwpJggeLcH1FgrICqDSROWqlnJXGCXK4Mn6FcdK2B6670Xr73lHMG1kHw8R87A0ecZ+vA== @@ -6907,14 +6980,14 @@ ts-pegjs@^4.2.1: prettier "^2.8.8" ts-morph "^18.0.0" -ts-transforms@^1.3.2: - version "1.3.2" - resolved "https://registry.yarnpkg.com/ts-transforms/-/ts-transforms-1.3.2.tgz#905d6f191b8e1172d9bee5658874159804873753" - integrity sha512-k8GrD8kGDDPsI8kxRyE2ypn7d/XAPHNgrfLA5QD2KLcAi9olSKXYojv7VrdhigM6XU91CpvInFBonaqQBGBOcg== +ts-transforms@~1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/ts-transforms/-/ts-transforms-1.4.0.tgz#f4708f2803127183aeeea68b012a5927f6e2088a" + integrity sha512-40G++/PYXouGNf/jj43S78tIn6Lt7zjrWzDXUFQ8Ax2urFQT4gkrZKHrsxdHhBvtIatPGOLCz23WbCaGGGQ1uw== dependencies: - "@terascope/data-mate" "^1.3.2" - "@terascope/types" "^1.2.0" - "@terascope/utils" "^1.3.2" + "@terascope/data-mate" "^1.4.0" + "@terascope/types" "^1.3.0" + "@terascope/utils" "^1.4.0" awesome-phonenumber "^7.2.0" graphlib "^2.1.8" jexl "^2.2.2" @@ -6938,7 +7011,7 @@ tslib@^1.8.1: resolved "https://registry.yarnpkg.com/tslib/-/tslib-1.14.1.tgz#cf2d38bdc34a134bcaf1091c41f6619e2f672d00" integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== -tslib@^2.4.1, tslib@^2.6.2, tslib@^2.8.1: +tslib@^2.4.1, tslib@^2.6.2, tslib@~2.8.1: version "2.8.1" resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.8.1.tgz#612efe4ed235d567e8aba5f2a5fab70280ade83f" integrity sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w== @@ -7095,7 +7168,7 @@ unbzip2-stream@^1.0.9: buffer "^5.2.1" through "^2.3.8" -undici-types@~6.19.2: +undici-types@~6.19.2, undici-types@~6.19.8: version "6.19.8" resolved "https://registry.yarnpkg.com/undici-types/-/undici-types-6.19.8.tgz#35111c9d1437ab83a7cdc0abae2f26d88eda0a02" integrity sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw== @@ -7297,16 +7370,7 @@ word-wrap@^1.2.5: resolved "https://registry.yarnpkg.com/word-wrap/-/word-wrap-1.2.5.tgz#d2c45c6dd4fbce621a66f136cbe328afd0410b34" integrity sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA== -"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0": - version "7.0.0" - resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" - integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== - dependencies: - ansi-styles "^4.0.0" - string-width "^4.1.0" - strip-ansi "^6.0.0" - -wrap-ansi@^7.0.0: +"wrap-ansi-cjs@npm:wrap-ansi@^7.0.0", wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.yarnpkg.com/wrap-ansi/-/wrap-ansi-7.0.0.tgz#67e145cff510a6a6984bdf1152911d69d2eb9e43" integrity sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q== @@ -7342,13 +7406,13 @@ ws@^8.11.0: resolved "https://registry.yarnpkg.com/ws/-/ws-8.18.0.tgz#0d7505a6eafe2b0e712d232b42279f53bc289bbc" integrity sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw== -xlucene-parser@^1.3.2: - version "1.3.2" - resolved "https://registry.yarnpkg.com/xlucene-parser/-/xlucene-parser-1.3.2.tgz#2fc319bb43b6c2d36dea6425cbacbec149f1dccc" - integrity sha512-7h3mxmfgxrRFyJGU4yCo0pRVkAC37xueyCKXQ7J0ZbZXrk7W/7arfMMG/ZQ01gtldL6vjdO1j+Rnafe55mM03w== +xlucene-parser@^1.4.0: + version "1.4.0" + resolved "https://registry.yarnpkg.com/xlucene-parser/-/xlucene-parser-1.4.0.tgz#5bdf160233b0ae45b4dbfe645a7592f4fd6b5e69" + integrity sha512-vHtBjZFuPBUK2SeXoKTIRHO7+y4+SyoYxhK8C4uZY7sVBIHRBdn/gyHsN7ENcnt80N7wn9dop8E+7S8Grgywkw== dependencies: - "@terascope/types" "^1.2.0" - "@terascope/utils" "^1.3.2" + "@terascope/types" "^1.3.0" + "@terascope/utils" "^1.4.0" peggy "~4.1.1" ts-pegjs "^4.2.1"