cucumber · aslakhellesoy · Dec 8, 2022 · Nov 29, 2022 · Nov 28, 2022 · Nov 29, 2022
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
 ## [Unreleased]
+
+### Fixed
+- Fixed a regression in the python language implementation for regexes [#119](https://github.com/cucumber/language-service/pull/119)
+
 ### Added
 - Added support for JavaScript - [#42](https://github.com/cucumber/language-service/issues/42), [#115](https://github.com/cucumber/language-service/pull/115), [#120](https://github.com/cucumber/language-service/pull/120)
 

diff --git a/src/language/pythonLanguage.ts b/src/language/pythonLanguage.ts
@@ -8,7 +8,7 @@ export const pythonLanguage: Language = {
       case 'string': {
         return stringLiteral(node)
       }
-      case 'concatednated_string': {
+      case 'concatenated_string': {
         return stringLiteral(node)
       }
       case 'identifier': {
@@ -20,18 +20,15 @@ export const pythonLanguage: Language = {
     }
   },
   toParameterTypeRegExps(node: TreeSitterSyntaxNode) {
-    return RegExp(cleanRegex(stringLiteral(node)))
+    return RegExp(cleanRegExp(stringLiteral(node)))
   },
   toStepDefinitionExpression(node: TreeSitterSyntaxNode): StringOrRegExp {
-    // this removes the head and tail apostrophes
-    // remove python named capture groups.
+    // This removes the head and tail apostrophes.
     // TODO: This should be temporary. Python supports
     // a wider array of regex features than javascript
     // a singular way of communicating regex consistent
     // across languages is necessary
-    return isRegex(node.text.slice(1, -1))
-      ? RegExp(cleanRegex(node.text.slice(1, -1).split('?P').join('')))
-      : node.text.slice(1, -1)
+    return toStringOrRegExp(node.text)
   },
   defineParameterTypeQueries: [
     `(call
@@ -56,13 +53,13 @@ export const pythonLanguage: Language = {
   defineStepDefinitionQueries: [
     `
     (decorated_definition
-        (decorator
-            (call
-                function: (identifier) @method
-                arguments: (argument_list (string) @expression)
-            )
+      (decorator
+        (call
+          function: (identifier) @method
+          arguments: (argument_list (string) @expression)
         )
-        (#match? @method "(given|when|then)")
+      )
+      (#match? @method "(given|when|then)")
     ) @root
 `,
   ],
@@ -86,25 +83,32 @@ export const pythonLanguage: Language = {
       # Please convert to use regular expressions, as Behave does not currently support Cucumber Expressions`,
 }
 
-function cleanRegex(regexString: string) {
-  const startsWith = regexString[0]
+function cleanRegExp(regExpString: string): string {
+  const startsWith = regExpString[0]
   switch (startsWith) {
     case '/':
-      return regexString.slice(1, -1)
+      return regExpString.slice(1, -1)
     default:
-      return regexString
+      return regExpString
   }
 }
+export function toStringOrRegExp(step: string): StringOrRegExp {
+  return isRegExp(step.slice(1, -1))
+    ? RegExp(cleanRegExp(step.slice(1, -1).split('?P').join('')))
+    : step.slice(1, -1)
+}
 
-function stringLiteral(node: TreeSitterSyntaxNode) {
+function stringLiteral(node: TreeSitterSyntaxNode): string {
   const isFString = node.text.startsWith('f')
   const cleanWord = isFString ? node.text.slice(1).slice(1, -1) : node.text.slice(1, -1)
   return cleanWord
 }
 
-function isRegex(cleanWord: string) {
+export function isRegExp(cleanWord: string): boolean {
   const startsWithSlash = cleanWord.startsWith('/')
   const namedGroupMatch = /\?P/
+  const specialCharsMatch = /\(|\)|\.|\*|\\|\|/
   const containsNamedGroups = namedGroupMatch.test(cleanWord)
-  return startsWithSlash || containsNamedGroups
+  const containsSpecialChars = specialCharsMatch.test(cleanWord)
+  return startsWithSlash || containsNamedGroups || containsSpecialChars
 }
diff --git a/test/language/pythonLanguage.test.ts b/test/language/pythonLanguage.test.ts
@@ -0,0 +1,19 @@
+import assert from 'assert'
+
+import { toStringOrRegExp } from '../../src/language/pythonLanguage.js'
+
+describe('pythonLanguage', () => {
+  it('should identify and return regexes correctly', () => {
+    // NOTE these are strings that would look like from tree-sitter
+    const regexes = ['"Something (.*)"', '"Catch them digits \\d+"']
+    regexes.forEach(function (regex) {
+      assert(toStringOrRegExp(regex) instanceof RegExp)
+    })
+  })
+  it('should identify normal strings and just return a string', () => {
+    const nonregexes = ['"test"']
+    nonregexes.forEach(function (nonregex) {
+      assert(toStringOrRegExp(nonregex) == 'test')
+    })
+  })
+})