Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move regex plugin from if-plugins to builtins #748

Merged
merged 8 commits into from
May 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion manifests/plugins/regex/failure-missing-input-param.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ initialize:
plugins:
regex:
method: Regex
path: "@grnsft/if-plugins"
path: "builtin"
global-config:
parameter: physical-processor
match: ^(.*),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ initialize:
plugins:
regex:
method: Regex
path: "@grnsft/if-plugins"
path: "builtin"
global-config:
parameter: physical-processor
match: ^
Expand Down
4 changes: 2 additions & 2 deletions manifests/plugins/regex/success.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ name: regex
description: successful path
tags:
initialize:
outputs: ['yaml']
# outputs: ['yaml']
plugins:
regex:
method: Regex
path: "@grnsft/if-plugins"
path: "builtin"
global-config:
parameter: physical-processor
match: ^(.*),
Expand Down
153 changes: 153 additions & 0 deletions src/__tests__/unit/builtins/regex.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
import {Regex} from '../../../builtins/regex';

import {ERRORS} from '../../../util/errors';

const {InputValidationError, ConfigValidationError} = ERRORS;

describe('lib/regex: ', () => {
describe('Regex: ', () => {
const globalConfig = {
parameter: 'physical-processor',
match: '^[^,]+',
output: 'cpu/name',
};
const regex = Regex(globalConfig);

describe('init: ', () => {
it('successfully initalized.', () => {
expect(regex).toHaveProperty('metadata');
expect(regex).toHaveProperty('execute');
});
});

describe('execute(): ', () => {
it('successfully applies Regex strategy to given input.', async () => {
const physicalProcessor =
'Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz';
expect.assertions(1);

const expectedResult = [
{
timestamp: '2021-01-01T00:00:00Z',
duration: 3600,
'physical-processor': physicalProcessor,
'cpu/name': 'Intel® Xeon® Platinum 8272CL',
},
];

const result = await regex.execute([
{
timestamp: '2021-01-01T00:00:00Z',
duration: 3600,
'physical-processor': physicalProcessor,
},
]);

expect(result).toStrictEqual(expectedResult);
});

it('returns a result when regex is not started and ended with ``.', async () => {
const physicalProcessor =
'Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz';
expect.assertions(1);

const globalConfig = {
parameter: 'physical-processor',
match: '[^,]+/',
output: 'cpu/name',
};
const regex = Regex(globalConfig);

const expectedResult = [
{
timestamp: '2021-01-01T00:00:00Z',
duration: 3600,
'physical-processor': physicalProcessor,
'cpu/name': 'Intel® Xeon® Platinum 8272CL',
},
];

const result = await regex.execute([
{
timestamp: '2021-01-01T00:00:00Z',
duration: 3600,
'physical-processor': physicalProcessor,
},
]);

expect(result).toStrictEqual(expectedResult);
});

it('throws an error when `parameter` does not match to `match`.', async () => {
const physicalProcessor =
'Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz';
const expectedMessage = `Regex: \`${physicalProcessor}\` does not match the /^(^:)+/ regex expression.`;

const globalConfig = {
parameter: 'physical-processor',
match: '^(^:)+',
output: 'cpu/name',
};
const regex = Regex(globalConfig);

expect.assertions(1);

try {
await regex.execute([
{
timestamp: '2021-01-01T00:00:00Z',
duration: 3600,
'physical-processor': physicalProcessor,
},
]);
} catch (error) {
expect(error).toStrictEqual(
new InputValidationError(expectedMessage)
);
}
});

it('throws an error on missing global config.', async () => {
const expectedMessage = 'Regex: Configuration data is missing.';

const config = undefined;
const regex = Regex(config!);

expect.assertions(1);

try {
await regex.execute([
{
timestamp: '2021-01-01T00:00:00Z',
duration: 3600,
},
]);
} catch (error) {
expect(error).toStrictEqual(
new ConfigValidationError(expectedMessage)
);
}
});

it('throws an error on missing params in input.', async () => {
const expectedMessage =
'Regex: `physical-processor` is missing from the input.';

expect.assertions(1);

try {
await regex.execute([
{
timestamp: '2021-01-01T00:00:00Z',
duration: 3600,
},
]);
} catch (error) {
expect(error).toStrictEqual(
new InputValidationError(expectedMessage)
);
}
});
});
});
});
1 change: 1 addition & 0 deletions src/builtins/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@ export {SciEmbodied} from './sci-embodied';
export {Sci} from './sci';
export {Exponent} from './exponent';
export {Shell} from './shell';
export {Regex} from './regex';
91 changes: 91 additions & 0 deletions src/builtins/regex/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
# Regex

`regex` is a generic plugin to match part of one string in an `input` and extract it into an output.

You provide the name of the value you want to match, and a name to use to add the regex to the output array.

For example, `boavizta-cpu` need `cpu/name` to work, however `cloud-metadata` returns `physical-processor` which usually contains a long string of processors that the instance could be separated by `,`, like so:

```
Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz
```

## Parameters

### Plugin config

- `parameter` - a parameter by a specific configured string
- `match` - a regex by which needs to match the `parameter`
- `output` - output parameter name in the input

### Inputs

- `parameter` - as input parameter, must be available in the input array

## Returns

- `output`: the first match of `parameter` with the parameter name with `match` defined in global config.

## Implementation

To run the plugin, you must first create an instance of `Regex`. Then, you can call `execute()`.

```typescript

const globalConfig = {
parameter: 'physical-processor',
match: '^[^,]+',
output: 'cpu/name',
};
const regex = Regex(globalConfig);

const input = [
{
timestamp: '2021-01-01T00:00:00Z',
duration: 3600,
'physical-processor':
'Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz',
},
];
```

## Example manifest

IF users will typically call the plugin as part of a pipeline defined in a manifest file. In this case, instantiating the plugin is handled by `if` and does not have to be done explicitly by the user. The following is an example manifest that calls `regex`:

```yaml
name: regex-demo
description:
tags:
initialize:
outputs:
- yaml
plugins:
regex:
method: Regex
path: 'builtin'
global-config:
parameter: physical-processor
match: ^[^,]+
output: cpu/name
tree:
children:
child:
pipeline:
- regex
config:
regex:
inputs:
- timestamp: 2023-08-06T00:00
duration: 3600
physical-processor: Intel® Xeon® Platinum 8272CL,Intel® Xeon® 8171M 2.1 GHz,Intel® Xeon® E5-2673 v4 2.3 GHz,Intel® Xeon® E5-2673 v3 2.4 GHz
```

You can run this example by saving it as `manifests/plugins/regex.yml` and executing the following command from the project root:

```sh
npm i -g @grnsft/if
if --manifest manifests/examples/regex.yml --output manifests/outputs/regex.yml
```

The results will be saved to a new `yaml` file in `manifests/outputs`.
106 changes: 106 additions & 0 deletions src/builtins/regex/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import {z} from 'zod';

import {buildErrorMessage} from '../../util/helpers';
import {ERRORS} from '../../util/errors';
import {validate} from '../../util/validations';

import {ExecutePlugin, PluginParams} from '../../types/interface';
import {ConfigParams} from '../../types/common';

const {InputValidationError, ConfigValidationError} = ERRORS;

export const Regex = (globalConfig: ConfigParams): ExecutePlugin => {
const errorBuilder = buildErrorMessage(Regex.name);
const metadata = {
kind: 'execute',
};

/**
* Checks global config value are valid.
*/
const validateGlobalConfig = () => {
if (!globalConfig) {
throw new ConfigValidationError(
errorBuilder({message: 'Configuration data is missing'})
);
}
const schema = z.object({
parameter: z.string().min(1),
match: z.string().min(1),
output: z.string(),
});

return validate<z.infer<typeof schema>>(schema, globalConfig);
};

/**
* Checks for required fields in input.
*/
const validateSingleInput = (input: PluginParams, parameter: string) => {
if (!input[parameter]) {
throw new InputValidationError(
errorBuilder({
message: `\`${parameter}\` is missing from the input`,
})
);
}

return input;
};

/**
* Executes the regex of the given parameter.
*/
const execute = (inputs: PluginParams[]) => {
const safeGlobalConfig = validateGlobalConfig();
const {parameter: parameter, match, output} = safeGlobalConfig;

return inputs.map(input => {
const safeInput = Object.assign(
{},
input,
validateSingleInput(input, parameter)
);

return {
...input,
[output]: extractMatching(safeInput, parameter, match),
};
});
};

/**
* Extracts a substring from the given input parameter that matches the provided regular expression pattern.
*/
const extractMatching = (
input: PluginParams,
parameter: string,
match: string
) => {
if (!match.startsWith('/')) {
match = '/' + match;
}

if (!match.endsWith('/g') && !match.endsWith('/')) {
match += '/';
}

const regex = eval(match);
const matchedItem = input[parameter].match(regex);

if (!matchedItem || !matchedItem[0]) {
throw new InputValidationError(
errorBuilder({
message: `\`${input[parameter]}\` does not match the ${match} regex expression`,
})
);
}

return matchedItem[0];
};

return {
metadata,
execute,
};
};
1 change: 1 addition & 0 deletions src/util/errors.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
const CUSTOM_ERRORS = [
'CliInputError',
'ConfigNotFoundError',
'ConfigValidationError',
'ExhaustError',
'FileNotFoundError',
'MakeDirectoryError',
Expand Down