-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #931 from terascope/merge-common-asset
migrate processors from common assets
- Loading branch information
Showing
92 changed files
with
4,769 additions
and
222 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
{ | ||
"name": "standard", | ||
"version": "1.1.0", | ||
"version": "1.2.0", | ||
"description": "Teraslice standard processor asset bundle" | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import { OpConfig } from '@terascope/types'; | ||
|
||
export interface CopyFieldConfig extends OpConfig { | ||
source: string; | ||
destination: string; | ||
delete_source: boolean; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
import { OpConfig } from '@terascope/types'; | ||
|
||
export interface CopyMetadataFieldConfig extends OpConfig { | ||
destination: string; | ||
meta_key: string; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
import { MapProcessor, DataEntity } from '@terascope/job-components'; | ||
import { CopyMetadataFieldConfig } from './interfaces.js'; | ||
|
||
// generalize any meta data field retrieval CopyMetadataField | ||
export default class CopyMetadataField extends MapProcessor<CopyMetadataFieldConfig> { | ||
map(doc: DataEntity) { | ||
doc[this.opConfig.destination] = doc.getMetadata(this.opConfig.meta_key); | ||
return doc; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import { ConvictSchema } from '@terascope/job-components'; | ||
import { CopyMetadataFieldConfig } from './interfaces.js'; | ||
|
||
export default class Schema extends ConvictSchema<CopyMetadataFieldConfig> { | ||
build() { | ||
return { | ||
destination: { | ||
doc: 'The property to copy to', | ||
format: 'required_String', | ||
default: null | ||
}, | ||
meta_key: { | ||
doc: 'The Dataentity metadata key to copy', | ||
format: 'required_String', | ||
default: '_key' | ||
} | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
import { OpConfig } from '@terascope/types'; | ||
|
||
export interface CountUniqueConfig extends OpConfig { | ||
preserve_fields: string[]; | ||
field: string; | ||
is_meta_field: boolean; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
import { BatchProcessor, DataEntity, has } from '@terascope/job-components'; | ||
import { CountUniqueConfig } from './interfaces.js'; | ||
|
||
export default class CountUnique extends BatchProcessor<CountUniqueConfig> { | ||
async onBatch(dataArray: DataEntity[]) { | ||
const results: Record<string, DataEntity> = {}; | ||
|
||
for (const doc of dataArray) { | ||
const key = this._getIdentifier(doc); | ||
|
||
if (!has(results, key)) { | ||
results[key] = DataEntity.make({ | ||
count: 0, | ||
_key: key | ||
}, { _key: key }); | ||
} | ||
|
||
results[key].count++; | ||
|
||
this.opConfig.preserve_fields.forEach((field) => { | ||
if (doc[field] != null) { | ||
results[key][field] = doc[field]; | ||
} | ||
}); | ||
} | ||
|
||
return Object.values(results); | ||
} | ||
|
||
private _getIdentifier(doc: DataEntity): any { | ||
if (this.opConfig.is_meta_field) { | ||
return doc.getMetadata(this.opConfig.field); | ||
} | ||
|
||
return doc[this.opConfig.field]; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import { ConvictSchema, isString } from '@terascope/job-components'; | ||
import { CountUniqueConfig } from './interfaces.js'; | ||
|
||
export default class Schema extends ConvictSchema<CountUniqueConfig> { | ||
build() { | ||
return { | ||
preserve_fields: { | ||
doc: 'A list of fields whose last seen values are added to the result in addition to the count', | ||
default: [], | ||
format: (input: unknown) => { | ||
if (!Array.isArray(input) || input.some((val) => !isString(val))) { | ||
throw new Error('Parameter "preserve_fields" must be an array of strings'); | ||
} | ||
} | ||
}, | ||
field: { | ||
doc: 'Field that is counted, defaults to metadata _key', | ||
default: '_key', | ||
format: 'required_String' | ||
}, | ||
is_meta_field: { | ||
doc: 'determines if the field to count on lives as a DataEntity meta field or on the record itself', | ||
default: true, | ||
format: Boolean | ||
} | ||
}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
import { OpConfig } from '@terascope/types'; | ||
|
||
export interface ExceptionRule { | ||
field: string; | ||
value: any; | ||
regex?: boolean; | ||
} | ||
|
||
export interface FilterConfig extends OpConfig { | ||
field: string | string []; | ||
value?: any; | ||
invert: boolean; | ||
array_index: number; | ||
filter_by: string; | ||
validation_function?: string; | ||
validation_function_args?: any; | ||
filtered_to_dead_letter_queue: boolean; | ||
exception_rules?: ExceptionRule[]; | ||
} |
Oops, something went wrong.