diff --git a/manifests/plugins/regex/failure-missing-input-param.yml b/manifests/plugins/regex/failure-missing-input-param.yml index 8015e47ea..8ed278e41 100644 --- a/manifests/plugins/regex/failure-missing-input-param.yml +++ b/manifests/plugins/regex/failure-missing-input-param.yml @@ -6,7 +6,7 @@ initialize: plugins: regex: method: Regex - path: "@grnsft/if-plugins" + path: "builtin" global-config: parameter: physical-processor match: ^(.*), diff --git a/manifests/plugins/regex/failure-not-matching-with-regex.yml b/manifests/plugins/regex/failure-not-matching-with-regex.yml index 0db0a9116..84189dcde 100644 --- a/manifests/plugins/regex/failure-not-matching-with-regex.yml +++ b/manifests/plugins/regex/failure-not-matching-with-regex.yml @@ -6,7 +6,7 @@ initialize: plugins: regex: method: Regex - path: "@grnsft/if-plugins" + path: "builtin" global-config: parameter: physical-processor match: ^ diff --git a/manifests/plugins/regex/success.yml b/manifests/plugins/regex/success.yml index bb01b4b93..0fa735fa6 100644 --- a/manifests/plugins/regex/success.yml +++ b/manifests/plugins/regex/success.yml @@ -2,11 +2,11 @@ name: regex description: successful path tags: initialize: - outputs: ['yaml'] + # outputs: ['yaml'] plugins: regex: method: Regex - path: "@grnsft/if-plugins" + path: "builtin" global-config: parameter: physical-processor match: ^(.*), diff --git a/src/__tests__/unit/builtins/regex.test.ts b/src/__tests__/unit/builtins/regex.test.ts new file mode 100644 index 000000000..19ccdf89d --- /dev/null +++ b/src/__tests__/unit/builtins/regex.test.ts @@ -0,0 +1,153 @@ +import {Regex} from '../../../builtins/regex'; + +import {ERRORS} from '../../../util/errors'; + +const {InputValidationError, ConfigValidationError} = ERRORS; + +describe('lib/regex: ', () => { + describe('Regex: ', () => { + const globalConfig = { + parameter: 'physical-processor', + match: '^[^,]+', + output: 'cpu/name', + }; + const regex = Regex(globalConfig); + + describe('init: ', () => { + it('successfully initalized.', () => { + expect(regex).toHaveProperty('metadata'); + expect(regex).toHaveProperty('execute'); + }); + }); + + describe('execute(): ', () => { + it('successfully applies Regex strategy to given input.', async () => { + const physicalProcessor = + 'Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz'; + expect.assertions(1); + + const expectedResult = [ + { + timestamp: '2021-01-01T00:00:00Z', + duration: 3600, + 'physical-processor': physicalProcessor, + 'cpu/name': 'Intel® Xeon® Platinum 8272CL', + }, + ]; + + const result = await regex.execute([ + { + timestamp: '2021-01-01T00:00:00Z', + duration: 3600, + 'physical-processor': physicalProcessor, + }, + ]); + + expect(result).toStrictEqual(expectedResult); + }); + + it('returns a result when regex is not started and ended with ``.', async () => { + const physicalProcessor = + 'Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz'; + expect.assertions(1); + + const globalConfig = { + parameter: 'physical-processor', + match: '[^,]+/', + output: 'cpu/name', + }; + const regex = Regex(globalConfig); + + const expectedResult = [ + { + timestamp: '2021-01-01T00:00:00Z', + duration: 3600, + 'physical-processor': physicalProcessor, + 'cpu/name': 'Intel® Xeon® Platinum 8272CL', + }, + ]; + + const result = await regex.execute([ + { + timestamp: '2021-01-01T00:00:00Z', + duration: 3600, + 'physical-processor': physicalProcessor, + }, + ]); + + expect(result).toStrictEqual(expectedResult); + }); + + it('throws an error when `parameter` does not match to `match`.', async () => { + const physicalProcessor = + 'Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz'; + const expectedMessage = `Regex: \`${physicalProcessor}\` does not match the /^(^:)+/ regex expression.`; + + const globalConfig = { + parameter: 'physical-processor', + match: '^(^:)+', + output: 'cpu/name', + }; + const regex = Regex(globalConfig); + + expect.assertions(1); + + try { + await regex.execute([ + { + timestamp: '2021-01-01T00:00:00Z', + duration: 3600, + 'physical-processor': physicalProcessor, + }, + ]); + } catch (error) { + expect(error).toStrictEqual( + new InputValidationError(expectedMessage) + ); + } + }); + + it('throws an error on missing global config.', async () => { + const expectedMessage = 'Regex: Configuration data is missing.'; + + const config = undefined; + const regex = Regex(config!); + + expect.assertions(1); + + try { + await regex.execute([ + { + timestamp: '2021-01-01T00:00:00Z', + duration: 3600, + }, + ]); + } catch (error) { + expect(error).toStrictEqual( + new ConfigValidationError(expectedMessage) + ); + } + }); + + it('throws an error on missing params in input.', async () => { + const expectedMessage = + 'Regex: `physical-processor` is missing from the input.'; + + expect.assertions(1); + + try { + await regex.execute([ + { + timestamp: '2021-01-01T00:00:00Z', + duration: 3600, + }, + ]); + } catch (error) { + expect(error).toStrictEqual( + new InputValidationError(expectedMessage) + ); + } + }); + }); + }); +}); diff --git a/src/builtins/index.ts b/src/builtins/index.ts index 7af79c2e1..a4ffc389f 100644 --- a/src/builtins/index.ts +++ b/src/builtins/index.ts @@ -11,3 +11,4 @@ export {SciEmbodied} from './sci-embodied'; export {Sci} from './sci'; export {Exponent} from './exponent'; export {Shell} from './shell'; +export {Regex} from './regex'; diff --git a/src/builtins/regex/README.md b/src/builtins/regex/README.md new file mode 100644 index 000000000..52a99db9f --- /dev/null +++ b/src/builtins/regex/README.md @@ -0,0 +1,91 @@ +# Regex + +`regex` is a generic plugin to match part of one string in an `input` and extract it into an output. + +You provide the name of the value you want to match, and a name to use to add the regex to the output array. + +For example, `boavizta-cpu` need `cpu/name` to work, however `cloud-metadata` returns `physical-processor` which usually contains a long string of processors that the instance could be separated by `,`, like so: + +``` +Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz +``` + +## Parameters + +### Plugin config + +- `parameter` - a parameter by a specific configured string +- `match` - a regex by which needs to match the `parameter` +- `output` - output parameter name in the input + +### Inputs + +- `parameter` - as input parameter, must be available in the input array + +## Returns + +- `output`: the first match of `parameter` with the parameter name with `match` defined in global config. + +## Implementation + +To run the plugin, you must first create an instance of `Regex`. Then, you can call `execute()`. + +```typescript + +const globalConfig = { + parameter: 'physical-processor', + match: '^[^,]+', + output: 'cpu/name', +}; +const regex = Regex(globalConfig); + +const input = [ + { + timestamp: '2021-01-01T00:00:00Z', + duration: 3600, + 'physical-processor': + 'Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz', + }, +]; +``` + +## Example manifest + +IF users will typically call the plugin as part of a pipeline defined in a manifest file. In this case, instantiating the plugin is handled by `if` and does not have to be done explicitly by the user. The following is an example manifest that calls `regex`: + +```yaml +name: regex-demo +description: +tags: +initialize: + outputs: + - yaml + plugins: + regex: + method: Regex + path: 'builtin' + global-config: + parameter: physical-processor + match: ^[^,]+ + output: cpu/name +tree: + children: + child: + pipeline: + - regex + config: + regex: + inputs: + - timestamp: 2023-08-06T00:00 + duration: 3600 + physical-processor: Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz +``` + +You can run this example by saving it as `manifests/plugins/regex.yml` and executing the following command from the project root: + +```sh +npm i -g @grnsft/if +if --manifest manifests/examples/regex.yml --output manifests/outputs/regex.yml +``` + +The results will be saved to a new `yaml` file in `manifests/outputs`. diff --git a/src/builtins/regex/index.ts b/src/builtins/regex/index.ts new file mode 100644 index 000000000..7dc854461 --- /dev/null +++ b/src/builtins/regex/index.ts @@ -0,0 +1,106 @@ +import {z} from 'zod'; + +import {buildErrorMessage} from '../../util/helpers'; +import {ERRORS} from '../../util/errors'; +import {validate} from '../../util/validations'; + +import {ExecutePlugin, PluginParams} from '../../types/interface'; +import {ConfigParams} from '../../types/common'; + +const {InputValidationError, ConfigValidationError} = ERRORS; + +export const Regex = (globalConfig: ConfigParams): ExecutePlugin => { + const errorBuilder = buildErrorMessage(Regex.name); + const metadata = { + kind: 'execute', + }; + + /** + * Checks global config value are valid. + */ + const validateGlobalConfig = () => { + if (!globalConfig) { + throw new ConfigValidationError( + errorBuilder({message: 'Configuration data is missing'}) + ); + } + const schema = z.object({ + parameter: z.string().min(1), + match: z.string().min(1), + output: z.string(), + }); + + return validate>(schema, globalConfig); + }; + + /** + * Checks for required fields in input. + */ + const validateSingleInput = (input: PluginParams, parameter: string) => { + if (!input[parameter]) { + throw new InputValidationError( + errorBuilder({ + message: `\`${parameter}\` is missing from the input`, + }) + ); + } + + return input; + }; + + /** + * Executes the regex of the given parameter. + */ + const execute = (inputs: PluginParams[]) => { + const safeGlobalConfig = validateGlobalConfig(); + const {parameter: parameter, match, output} = safeGlobalConfig; + + return inputs.map(input => { + const safeInput = Object.assign( + {}, + input, + validateSingleInput(input, parameter) + ); + + return { + ...input, + [output]: extractMatching(safeInput, parameter, match), + }; + }); + }; + + /** + * Extracts a substring from the given input parameter that matches the provided regular expression pattern. + */ + const extractMatching = ( + input: PluginParams, + parameter: string, + match: string + ) => { + if (!match.startsWith('/')) { + match = '/' + match; + } + + if (!match.endsWith('/g') && !match.endsWith('/')) { + match += '/'; + } + + const regex = eval(match); + const matchedItem = input[parameter].match(regex); + + if (!matchedItem || !matchedItem[0]) { + throw new InputValidationError( + errorBuilder({ + message: `\`${input[parameter]}\` does not match the ${match} regex expression`, + }) + ); + } + + return matchedItem[0]; + }; + + return { + metadata, + execute, + }; +}; diff --git a/src/util/errors.ts b/src/util/errors.ts index 61bf553a9..a65a603be 100644 --- a/src/util/errors.ts +++ b/src/util/errors.ts @@ -1,6 +1,7 @@ const CUSTOM_ERRORS = [ 'CliInputError', 'ConfigNotFoundError', + 'ConfigValidationError', 'ExhaustError', 'FileNotFoundError', 'MakeDirectoryError',