From d68862deb037371e519b54ba70b49be0bc6950c2 Mon Sep 17 00:00:00 2001 From: Dev Gupta Date: Mon, 28 Nov 2022 20:35:35 -0500 Subject: [PATCH 1/5] Fixes cucumber/vscode#125 Regression on checking for the correct symbols in a regex. --- src/language/pythonLanguage.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/language/pythonLanguage.ts b/src/language/pythonLanguage.ts index e8d86db5..e42bfb58 100644 --- a/src/language/pythonLanguage.ts +++ b/src/language/pythonLanguage.ts @@ -105,6 +105,8 @@ function stringLiteral(node: TreeSitterSyntaxNode) { function isRegex(cleanWord: string) { const startsWithSlash = cleanWord.startsWith('/') const namedGroupMatch = /\?P/ + const specialCharsMatch = /\(|\)|\.|\*|\\|\|/ const containsNamedGroups = namedGroupMatch.test(cleanWord) - return startsWithSlash || containsNamedGroups + const containsSpecialChars = specialCharsMatch.test(cleanWord) + return startsWithSlash || containsNamedGroups || containsSpecialChars } From 7f9daf854f9170bb68f2f6c2271fe0dd24b4d19d Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Mon, 28 Nov 2022 22:49:07 +0000 Subject: [PATCH 2/5] Update dependency @types/mocha to v10.0.1 --- package-lock.json | 14 +++++++------- package.json | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/package-lock.json b/package-lock.json index 11244ff3..a5aafae4 100644 --- a/package-lock.json +++ b/package-lock.json @@ -25,7 +25,7 @@ "@cucumber/cucumber": "^8.9.0", "@cucumber/message-streams": "^4.0.1", "@types/glob": "8.0.0", - "@types/mocha": "10.0.0", + "@types/mocha": "10.0.1", "@types/node": "18.11.9", "@types/react": "18.0.25", "@typescript-eslint/eslint-plugin": "5.44.0", @@ -750,9 +750,9 @@ "dev": true }, "node_modules/@types/mocha": { - "version": "10.0.0", - "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.0.tgz", - "integrity": "sha512-rADY+HtTOA52l9VZWtgQfn4p+UDVM2eDVkMZT1I6syp0YKxW2F9v+0pbRZLsvskhQv/vMb6ZfCay81GHbz5SHg==", + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.1.tgz", + "integrity": "sha512-/fvYntiO1GeICvqbQ3doGDIP97vWmvFt83GKguJ6prmQM2iXZfFcq6YE8KteFyRtX2/h5Hf91BYvPodJKFYv5Q==", "dev": true }, "node_modules/@types/mustache": { @@ -8450,9 +8450,9 @@ "dev": true }, "@types/mocha": { - "version": "10.0.0", - "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.0.tgz", - "integrity": "sha512-rADY+HtTOA52l9VZWtgQfn4p+UDVM2eDVkMZT1I6syp0YKxW2F9v+0pbRZLsvskhQv/vMb6ZfCay81GHbz5SHg==", + "version": "10.0.1", + "resolved": "https://registry.npmjs.org/@types/mocha/-/mocha-10.0.1.tgz", + "integrity": "sha512-/fvYntiO1GeICvqbQ3doGDIP97vWmvFt83GKguJ6prmQM2iXZfFcq6YE8KteFyRtX2/h5Hf91BYvPodJKFYv5Q==", "dev": true }, "@types/mustache": { diff --git a/package.json b/package.json index 3e641186..12d62e07 100644 --- a/package.json +++ b/package.json @@ -84,7 +84,7 @@ "@cucumber/cucumber": "^8.9.0", "@cucumber/message-streams": "^4.0.1", "@types/glob": "8.0.0", - "@types/mocha": "10.0.0", + "@types/mocha": "10.0.1", "@types/node": "18.11.9", "@types/react": "18.0.25", "@typescript-eslint/eslint-plugin": "5.44.0", From 93fe5b365f5f6315e1b9b1eb553bd256861fb8e1 Mon Sep 17 00:00:00 2001 From: Dev Gupta Date: Mon, 28 Nov 2022 20:35:35 -0500 Subject: [PATCH 3/5] Fixes cucumber/vscode#125 Regression on checking for the correct symbols in a regex. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 739ab589..84388a36 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). ## [Unreleased] +- Fixes a regression in the python language implementation for regexes ## [1.3.0] - 2022-11-28 ### Added From e388dfe6935b1f4ac8d18ebe930be5f74d95e58d Mon Sep 17 00:00:00 2001 From: Dev Gupta Date: Mon, 5 Dec 2022 20:52:59 -0500 Subject: [PATCH 4/5] Added a test for a regex regression --- src/language/pythonLanguage.ts | 12 +++++++----- test/language/pythonLanguage.test.ts | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 test/language/pythonLanguage.test.ts diff --git a/src/language/pythonLanguage.ts b/src/language/pythonLanguage.ts index e42bfb58..f20bc4c3 100644 --- a/src/language/pythonLanguage.ts +++ b/src/language/pythonLanguage.ts @@ -29,9 +29,7 @@ export const pythonLanguage: Language = { // a wider array of regex features than javascript // a singular way of communicating regex consistent // across languages is necessary - return isRegex(node.text.slice(1, -1)) - ? RegExp(cleanRegex(node.text.slice(1, -1).split('?P').join(''))) - : node.text.slice(1, -1) + return toRegexStep(node.text) }, defineParameterTypeQueries: [ `(call @@ -95,14 +93,18 @@ function cleanRegex(regexString: string) { return regexString } } - +export function toRegexStep(step: string) { + return isRegex(step.slice(1, -1)) + ? RegExp(cleanRegex(step.slice(1, -1).split('?P').join(''))) + : step.slice(1, -1) +} function stringLiteral(node: TreeSitterSyntaxNode) { const isFString = node.text.startsWith('f') const cleanWord = isFString ? node.text.slice(1).slice(1, -1) : node.text.slice(1, -1) return cleanWord } -function isRegex(cleanWord: string) { +export function isRegex(cleanWord: string) { const startsWithSlash = cleanWord.startsWith('/') const namedGroupMatch = /\?P/ const specialCharsMatch = /\(|\)|\.|\*|\\|\|/ diff --git a/test/language/pythonLanguage.test.ts b/test/language/pythonLanguage.test.ts new file mode 100644 index 00000000..a031111d --- /dev/null +++ b/test/language/pythonLanguage.test.ts @@ -0,0 +1,19 @@ +import assert from 'assert' + +import { toRegexStep } from '../../src/language/pythonLanguage.js' + +describe('pythonLanguage', () => { + it('should identify and return regexes correctly', () => { + //NOTE these are strings that would look like from tree-sitter + const regexes = ['"Something (.*)"', '"Catch them digits \\d+"'] + regexes.forEach(function (regex) { + assert(toRegexStep(regex) instanceof RegExp) + }) + }) + it('should identify normal strings and just return a string', () => { + const nonregexes = ['"test"'] + nonregexes.forEach(function (nonregex) { + assert(toRegexStep(nonregex) == 'test') + }) + }) +}) From 4bbee1824717fb697d32ad7d863ba3a010016552 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Aslak=20Helles=C3=B8y?= Date: Thu, 8 Dec 2022 11:21:31 +0000 Subject: [PATCH 5/5] Formatting/renames/types --- src/language/pythonLanguage.ts | 40 ++++++++++++++-------------- test/language/pythonLanguage.test.ts | 8 +++--- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/src/language/pythonLanguage.ts b/src/language/pythonLanguage.ts index f20bc4c3..7bb45398 100644 --- a/src/language/pythonLanguage.ts +++ b/src/language/pythonLanguage.ts @@ -8,7 +8,7 @@ export const pythonLanguage: Language = { case 'string': { return stringLiteral(node) } - case 'concatednated_string': { + case 'concatenated_string': { return stringLiteral(node) } case 'identifier': { @@ -20,16 +20,15 @@ export const pythonLanguage: Language = { } }, toParameterTypeRegExps(node: TreeSitterSyntaxNode) { - return RegExp(cleanRegex(stringLiteral(node))) + return RegExp(cleanRegExp(stringLiteral(node))) }, toStepDefinitionExpression(node: TreeSitterSyntaxNode): StringOrRegExp { - // this removes the head and tail apostrophes - // remove python named capture groups. + // This removes the head and tail apostrophes. // TODO: This should be temporary. Python supports // a wider array of regex features than javascript // a singular way of communicating regex consistent // across languages is necessary - return toRegexStep(node.text) + return toStringOrRegExp(node.text) }, defineParameterTypeQueries: [ `(call @@ -54,13 +53,13 @@ export const pythonLanguage: Language = { defineStepDefinitionQueries: [ ` (decorated_definition - (decorator - (call - function: (identifier) @method - arguments: (argument_list (string) @expression) - ) + (decorator + (call + function: (identifier) @method + arguments: (argument_list (string) @expression) ) - (#match? @method "(given|when|then)") + ) + (#match? @method "(given|when|then)") ) @root `, ], @@ -84,27 +83,28 @@ export const pythonLanguage: Language = { # Please convert to use regular expressions, as Behave does not currently support Cucumber Expressions`, } -function cleanRegex(regexString: string) { - const startsWith = regexString[0] +function cleanRegExp(regExpString: string): string { + const startsWith = regExpString[0] switch (startsWith) { case '/': - return regexString.slice(1, -1) + return regExpString.slice(1, -1) default: - return regexString + return regExpString } } -export function toRegexStep(step: string) { - return isRegex(step.slice(1, -1)) - ? RegExp(cleanRegex(step.slice(1, -1).split('?P').join(''))) +export function toStringOrRegExp(step: string): StringOrRegExp { + return isRegExp(step.slice(1, -1)) + ? RegExp(cleanRegExp(step.slice(1, -1).split('?P').join(''))) : step.slice(1, -1) } -function stringLiteral(node: TreeSitterSyntaxNode) { + +function stringLiteral(node: TreeSitterSyntaxNode): string { const isFString = node.text.startsWith('f') const cleanWord = isFString ? node.text.slice(1).slice(1, -1) : node.text.slice(1, -1) return cleanWord } -export function isRegex(cleanWord: string) { +export function isRegExp(cleanWord: string): boolean { const startsWithSlash = cleanWord.startsWith('/') const namedGroupMatch = /\?P/ const specialCharsMatch = /\(|\)|\.|\*|\\|\|/ diff --git a/test/language/pythonLanguage.test.ts b/test/language/pythonLanguage.test.ts index a031111d..fd8c67d8 100644 --- a/test/language/pythonLanguage.test.ts +++ b/test/language/pythonLanguage.test.ts @@ -1,19 +1,19 @@ import assert from 'assert' -import { toRegexStep } from '../../src/language/pythonLanguage.js' +import { toStringOrRegExp } from '../../src/language/pythonLanguage.js' describe('pythonLanguage', () => { it('should identify and return regexes correctly', () => { - //NOTE these are strings that would look like from tree-sitter + // NOTE these are strings that would look like from tree-sitter const regexes = ['"Something (.*)"', '"Catch them digits \\d+"'] regexes.forEach(function (regex) { - assert(toRegexStep(regex) instanceof RegExp) + assert(toStringOrRegExp(regex) instanceof RegExp) }) }) it('should identify normal strings and just return a string', () => { const nonregexes = ['"test"'] nonregexes.forEach(function (nonregex) { - assert(toRegexStep(nonregex) == 'test') + assert(toStringOrRegExp(nonregex) == 'test') }) }) })