Skip to content

Commit

Permalink
AG-37767 Remove XRegExp from the direct dependencies of AGTree
Browse files Browse the repository at this point in the history
Merge in ADGUARD-FILTERS/tsurlfilter from fix/AG-37767-1 to master

Squashed commit of the following:

commit 75ebda1
Author: scripthunter7 <[email protected]>
Date:   Wed Nov 27 14:24:09 2024 +0100

    add link

commit 37d81f1
Author: scripthunter7 <[email protected]>
Date:   Wed Nov 27 14:22:50 2024 +0100

    mention issue

commit 7dddd1f
Author: scripthunter7 <[email protected]>
Date:   Wed Nov 27 14:21:31 2024 +0100

    fix nit

commit 918c0e0
Author: scripthunter7 <[email protected]>
Date:   Wed Nov 27 11:30:58 2024 +0100

    mention agtree in tsurlfilter changelog

commit f501262
Author: scripthunter7 <[email protected]>
Date:   Wed Nov 27 10:50:52 2024 +0100

    fix comment

commit 561b5e3
Author: scripthunter7 <[email protected]>
Date:   Wed Nov 27 10:50:42 2024 +0100

    improve changelog

commit a8ce1fe
Author: scripthunter7 <[email protected]>
Date:   Tue Nov 26 21:50:27 2024 +0100

    rework xregexp
  • Loading branch information
scripthunter7 committed Nov 27, 2024
1 parent f3ba0e9 commit 014a3df
Show file tree
Hide file tree
Showing 9 changed files with 142 additions and 21 deletions.
14 changes: 14 additions & 0 deletions packages/agtree/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@ The format is based on [Keep a Changelog][keepachangelog], and this project adhe
[keepachangelog]: https://keepachangelog.com/en/1.0.0/
[semver]: https://semver.org/spec/v2.0.0.html

## [2.2.0] - 2024-11-27

### Removed

- `xregexp` library as a runtime dependency. It remains a development dependency for processing YAML files,
enabling enhanced readability and maintainability of regex patterns through free-spacing mode and inline comments.
However, xregexp is no longer bundled with the library to significantly reduce memory usage and bundle size,
improving overall performance.
The library now uses native ECMAScript regex patterns at runtime, ensuring compatibility
without the additional overhead of xregexp. Related to [AdguardBrowserExtension#3037].

[2.2.0]: https://github.com/AdguardTeam/tsurlfilter/releases/tag/agtree-v2.2.0
[AdguardBrowserExtension#3037]: https://github.com/AdguardTeam/AdguardBrowserExtension/issues/3037

## [2.1.4] - 2024-11-25

### Added
Expand Down
6 changes: 3 additions & 3 deletions packages/agtree/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@adguard/agtree",
"version": "2.1.4",
"version": "2.2.0",
"description": "Tool set for working with adblock filter lists",
"keywords": [
"adblock",
Expand Down Expand Up @@ -114,7 +114,8 @@
"ts-node": "^10.9.2",
"tsx": "^4.7.3",
"type-fest": "^4.18.0",
"typescript": "^5.0.4"
"typescript": "^5.0.4",
"xregexp": "^5.1.1"
},
"dependencies": {
"@adguard/css-tokenizer": "workspace:^",
Expand All @@ -124,7 +125,6 @@
"semver": "^7.5.3",
"sprintf-js": "^1.1.3",
"tldts": "^5.7.112",
"xregexp": "^5.1.1",
"zod": "3.21.4"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,15 @@
* @file Provides compatibility table data loading.
*/

import zod from 'zod';
import path from 'path';
// eslint-disable-next-line import/no-extraneous-dependencies
import yaml from 'js-yaml';
import { readFileSync, readdirSync } from 'fs';
// Note: we use XRegExp as a dev dependency, but we do not include this compatibility table data loader
// in the production build, so it is safe to ignore ESLint warning here.
// eslint-disable-next-line import/no-extraneous-dependencies
import XRegExp from 'xregexp';

import { type CompatibilityTable, type CompatibilityTableRow } from './types';
import {
Expand All @@ -18,8 +23,10 @@ import {
type ModifierDataSchema,
type RedirectDataSchema,
type ScriptletDataSchema,
KNOWN_VALIDATORS,
} from './schemas';
import { deepFreeze } from '../utils/deep-freeze';
import { EMPTY } from '../utils/constants';

/**
* Gets all `.yml` files from a directory.
Expand Down Expand Up @@ -139,7 +146,64 @@ const getCompatibilityTableData = <T extends BaseCompatibilityDataSchema>(
* @returns Compatibility table data for modifiers.
*/
const getModifiersCompatibilityTableData = (dir: string) => {
return getCompatibilityTableData<ModifierDataSchema>(dir, baseFileSchema(modifierDataSchema));
const valueFormatPreprocessorSchema = zod.object({
value_format: zod.optional(zod.string()),
value_format_flags: zod.optional(zod.string()),
}).passthrough().transform((data) => {
const {
value_format: valueFormat,
value_format_flags: valueFormatFlags,
} = data;

if (!valueFormat) {
return data;
}

const valueFormatTrimmed = valueFormat.trim();

// If it is a known validator, we don't need to validate it further
if (!valueFormatTrimmed && KNOWN_VALIDATORS.has(valueFormatTrimmed)) {
return data;
}

// Create an XRegExp pattern from the value format, then convert it to a native RegExp pattern
const xRegExpPattern = XRegExp(valueFormatTrimmed);
const regExpPattern = new RegExp(xRegExpPattern.source, xRegExpPattern.flags);

// If any flags are present in the pattern, we need to combine them with the existing flags

// Note: we need 'value_format_flags' because RegExp constructor doesn't support flags in the pattern,
// they should be passed as a separate argument, and perhaps this is the most convenient way to do it

// Note: do not use 'regExpPattern.toString()' because it will include the slashes and flags and
// you cannot create the equivalent RegExp object from it again
if (regExpPattern.flags) {
// 1. Get existing flags from 'value_format_flags'
const flags: Set<string> = new Set();

if (valueFormatFlags) {
valueFormatFlags.split(EMPTY).forEach((flag) => flags.add(flag));
}

// 2. Add flags from the RegExp pattern
regExpPattern.flags.split(EMPTY).forEach((flag) => flags.add(flag));

// 3. Update 'value_format_flags' with the combined flags
// eslint-disable-next-line no-param-reassign
data.value_format_flags = Array.from(flags).join(EMPTY);
}

// eslint-disable-next-line no-param-reassign
data.value_format = regExpPattern.source;

return data;
});

const combinedSchema = valueFormatPreprocessorSchema.pipe(modifierDataSchema);

const data = getCompatibilityTableData<ModifierDataSchema>(dir, baseFileSchema(combinedSchema));

return data;
};

/**
Expand Down
10 changes: 10 additions & 0 deletions packages/agtree/src/compatibility-tables/modifiers/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ fields:
| `exception_only` | The actual modifier can only be used in exceptions, it cannot be used in blocking rules. If it's value is `true`, then the modifier can be used only in exceptions. `exception_only` and `block_only` cannot be used together (they are mutually exclusive). | `boolean` | `false` |
| `value_optional` | Describes whether the *assignable* modifier value is required. For example, `$cookie` is assignable but it can be used without a value in exception rules: `@@\|\|example.com^$cookie`. If `false`, the `value_format` is required, e.g. the value of `$app` should always be specified. | `boolean` | `false` |
| `value_format` | Describes the format of the value. See [Value format](#value-format) for more details. | `string\|null` | `null` |
| `value_format_flags` | [RegExp flags][regexp-flags] for the `value_format` field. It can be used only if the `value_format` is provided and it's a regular expression. Thanks to XRegExp syntax, you can also use flags in the beginning of the regular expression, e.g. `(?i)pattern`. | `string\|null` | `null` |

<!-- markdownlint-enable MD013 -->

[regexp-flags]: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/RegExp/RegExp#flags

### Value format

The value format describes the format of the modifier value. It can be one of the following:
Expand Down Expand Up @@ -78,6 +81,13 @@ The value format describes the format of the modifier value. It can be one of th
- Example:
- For validating `domain` modifier, you can use `value_format: pipe_separated_domains`.

> [!NOTE]
> In YAML files, you can use XRegExp syntax for regular expressions.
> For example, you can use free-spacing mode `(?x)` to make the regular expression more readable,
> or you can use comments `(?#...)` to add comments to the regular expression,
> or named captures `(?<name>...)`.
> During build, YAML files are processed and we extract native regular expressions from XRegExp instances.
* * *

### <a name="footnote-asterisk-1"></a>
Expand Down
28 changes: 25 additions & 3 deletions packages/agtree/src/compatibility-tables/schemas/modifier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
*/

import zod from 'zod';
import XRegExp from 'xregexp';

import { zodToCamelCase } from '../utils/zod-camelcase';
import {
Expand All @@ -13,13 +12,20 @@ import {
nonEmptyStringSchema,
} from './base';
import { getErrorMessage } from '../../utils/error';
import { EMPTY } from '../../utils/constants';

/**
* Known validators that don't need to be validated as regex.
*/
const KNOWN_VALIDATORS = new Set([
export const KNOWN_VALIDATORS: ReadonlySet<string> = new Set([
'csp_value',
'domain',
'permissions_value',
'pipe_separated_apps',
'pipe_separated_denyallow_domains',
'pipe_separated_domains',
'pipe_separated_methods',
'pipe_separated_stealth_options',
'regexp',
'url',
]);
Expand Down Expand Up @@ -78,6 +84,11 @@ export const modifierDataSchema = zodToCamelCase(baseCompatibilityDataSchema.ext
* Its value can be a regex pattern or a known validator name (e.g. `domain`, `pipe_separated_domains`, etc.).
*/
value_format: nonEmptyStringSchema.nullable().default(null),

/**
* Describes the flags for the `value_format` regex pattern.
*/
value_format_flags: nonEmptyStringSchema.nullable().default(null),
}).superRefine((data, ctx) => {
// TODO: find something better, for now we can't add refine logic to the base schema:
// https://github.com/colinhacks/zod/issues/454#issuecomment-848370721
Expand All @@ -102,18 +113,29 @@ export const modifierDataSchema = zodToCamelCase(baseCompatibilityDataSchema.ext

// if it is a known validator, we don't need to validate it further
if (KNOWN_VALIDATORS.has(valueFormat)) {
if (data.value_format_flags) {
ctx.addIssue({
code: zod.ZodIssueCode.custom,
message: 'value_format_flags are not allowed for known validators',
});
}
return;
}

// otherwise, we need to validate it as a regex
try {
XRegExp(valueFormat);
new RegExp(valueFormat, data.value_format_flags ?? EMPTY);
} catch (error: unknown) {
ctx.addIssue({
code: zod.ZodIssueCode.custom,
message: getErrorMessage(error),
});
}
} else if (data.value_format_flags) {
ctx.addIssue({
code: zod.ZodIssueCode.custom,
message: 'value_format is required for value_format_flags',
});
}
}));

Expand Down
2 changes: 1 addition & 1 deletion packages/agtree/src/validator/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,7 @@ const validateForSpecificSyntax = (
throw new Error(`${SOURCE_DATA_ERROR_PREFIX.NO_VALUE_FORMAT_FOR_ASSIGNABLE}: '${modifierName}'`);
}

return validateValue(modifier, specificBlockerData.valueFormat);
return validateValue(modifier, specificBlockerData.valueFormat, specificBlockerData.valueFormatFlags);
}

if (modifier?.value) {
Expand Down
20 changes: 14 additions & 6 deletions packages/agtree/src/validator/value.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
import XRegExp from 'xregexp';

import {
type Modifier,
type AppList,
Expand Down Expand Up @@ -41,6 +39,7 @@ import {
VALIDATION_ERROR_PREFIX,
} from './constants';
import { defaultParserOptions } from '../parser/options';
import { isString } from '../utils/type-guards';

/**
* Represents the possible list parsers.
Expand Down Expand Up @@ -699,10 +698,15 @@ const isCustomValueFormatValidator = (valueFormat: string): valueFormat is Custo
*
* @param modifier Modifier AST node.
* @param valueFormat Value format for the modifier.
* @param valueFormatFlags Optional; RegExp flags for the value format.
*
* @returns Validation result.
*/
export const validateValue = (modifier: Modifier, valueFormat: string): ValidationResult => {
export const validateValue = (
modifier: Modifier,
valueFormat: string,
valueFormatFlags?: string | null,
): ValidationResult => {
if (isCustomValueFormatValidator(valueFormat)) {
const validator = CUSTOM_VALUE_FORMAT_MAP[valueFormat];
return validator(modifier);
Expand All @@ -714,14 +718,18 @@ export const validateValue = (modifier: Modifier, valueFormat: string): Validati
return getValueRequiredValidationResult(modifierName);
}

let xRegExp;
let regExp: RegExp;
try {
xRegExp = XRegExp(valueFormat);
if (isString(valueFormatFlags)) {
regExp = new RegExp(valueFormat, valueFormatFlags);
} else {
regExp = new RegExp(valueFormat);
}
} catch (e) {
throw new Error(`${SOURCE_DATA_ERROR_PREFIX.INVALID_VALUE_FORMAT_REGEXP}: '${modifierName}'`);
}

const isValid = xRegExp.test(modifier.value?.value);
const isValid = regExp.test(modifier.value?.value);
if (!isValid) {
return getInvalidValidationResult(`${VALIDATION_ERROR_PREFIX.VALUE_INVALID}: '${modifierName}'`);
}
Expand Down
6 changes: 6 additions & 0 deletions packages/tsurlfilter/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
<!-- TODO: manually add compare links for version changes -->
<!-- e.g. [1.0.77]: https://github.com/AdguardTeam/tsurlfilter/compare/tsurlfilter-v1.0.76...tsurlfilter-v1.0.77 -->

## Unreleased

### Changed

- Updated `@adguard/agtree` to `v2.2.0` which improves the bundle size and performance.

## [3.0.8] - 2024-11-25

### Changed
Expand Down
11 changes: 4 additions & 7 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 014a3df

Please sign in to comment.