This processor encapsulates the transform module of ts-transforms.
To use this processor, it requires a file containing all transform rules that is will be preforming and any necessary plugins that those rules require to operate.
Below contains an example of the transform rules, plugins and configurations to execute ts-transforms.
Example Job
{
"name" : "testing",
"workers" : 1,
"slicers" : 1,
"lifecycle" : "once",
"assets" : [
"standard"
],
"operations" : [
{
"_op": "test-reader"
},
{
"_op": "transform",
"plugins": ["someAssetId:plugins"],
"rules": ["someAssetId:transformRules.txt"],
"type_config": {
"some": "string",
"field": "string",
"field2": "string",
"date": "date",
"location": "geo-point"
},
"variables": {
"foo": "data"
}
}
]
}
Example rules located at someAssetId:transformRules.txt
{ "selector": "some: $foo", "source_field": "field", "target_field": "interm1", "tag": "someTag", "output": false }
{ "selector": "some: $foo", "source_field": "field2", "target_field": "interm2", "tag": "someTag", "output": false }
{ "follow": "someTag", "post_process": "join", "target_field": "final", "delimiter": " " }
{ "source_field": "otherField", "target_field": "lastField", "other_match_required": true }
{ "source_field": "_key", "target_field": "id", "other_match_required": true, "tag": "numberMe" }
{ "follow": "numberMe", "post_process": "number" }
{ "selector": "location: geoBox( top_left: '33.906320, -112.758421' bottom_right: '32.813646,-111.058902')", "source_field": "location", "target_field": "loc" }
{ "selector": "date:[2019-04-16T20:14:44.304Z TO *] AND bytes:>=1000000", "source_field": "date", "target_field": "last_seen", "tag": "tagOp" }
{ "follow": "tagOp", "post_process": "tag" }
Example plugin used by rules above located at someAssetId:plugins
import { DataEntity } from '@terascope/job-components';
class Tag {
static cardinality = 'one-to-one';
constructor(operationConfig) {
this.operationConfig = operationConfig;
}
run(doc: DataEntity) {
doc.wasTagged = true;
return doc;
}
}
export default class Plugin {
init() {
return {
tag: Tag,
};
}
}
Example of the data and the expected results
const data = DataEntity.makeArray([
{ some: 'data', field: 'hello', field2: 'world', _key: '1' },
{ location: '33.435967, -111.867710', _key: '2' },
{ date, bytes: '1200000', _key: '3' },
{ other: 'stuff', _key: '4' }
]);
const results = await process.run(data);
results === [
{ final: 'hello world', id: 1 ,
{ loc: '33.435967, -111.867710', id: 2 },
{ last_seen: date, id: 3, wasTagged: true }
];
Configuration | Description | Type | Notes |
---|---|---|---|
_op | Name of operation, it must reflect the exact name of the file | String | required |
rules | an array of strings that are the locations where rule files. must be specified in "assetName:path" format | String[] | required |
plugins | an array of strings that are the locations where plugins reside. must be specified in "assetName:modulePath" format | Object[] | optional, defaults to [] |
type_config | a schema for the data being consumed. Set the keys to your data field names, with values set to this enum | Object | optional |
variables | An object containing any variables for the xlucene rules | Object | optional |