Extract StringUnescaper from ConstExprParser

phpstan · Apr 17, 2023 · 376023a · 376023a
1 parent 5e2f2e0
commit 376023a
Show file tree

Hide file tree

Showing 3 changed files with 105 additions and 92 deletions.
diff --git a/src/Ast/ConstExpr/QuoteAwareConstExprStringNode.php b/src/Ast/ConstExpr/QuoteAwareConstExprStringNode.php
@@ -11,6 +11,7 @@
 use function sprintf;
 use function str_pad;
 use function strlen;
+use const STR_PAD_LEFT;
 
 class QuoteAwareConstExprStringNode implements ConstExprNode
 {
@@ -47,9 +48,10 @@ public function __toString(): string
 		return sprintf('"%s"', $this->escapeDoubleQuotedString());
 	}
 
-	private function escapeDoubleQuotedString() {
+	private function escapeDoubleQuotedString()
+	{
 		$quote = '"';
-		$escaped = addcslashes($this->value, "\n\r\t\f\v$" . $quote . "\\");
+		$escaped = addcslashes($this->value, "\n\r\t\f\v$" . $quote . '\\');
 
 		// Escape control characters and non-UTF-8 characters.
 		// Regex based on https://stackoverflow.com/a/11709412/385378.
@@ -68,10 +70,11 @@ private function escapeDoubleQuotedString() {
             | (?<=[\xF0-\xF4])[\x80-\xBF](?![\x80-\xBF]{2}) # Short 4 byte sequence
             | (?<=[\xF0-\xF4][\x80-\xBF])[\x80-\xBF](?![\x80-\xBF]) # Short 4 byte sequence (2)
         )/x';
-		return preg_replace_callback($regex, function ($matches) {
+		return preg_replace_callback($regex, static function ($matches) {
 			assert(strlen($matches[0]) === 1);
-			$hex = dechex(ord($matches[0]));;
-			return '\\x' . str_pad($hex, 2, '0', \STR_PAD_LEFT);
+			$hex = dechex(ord($matches[0]));
+
+			return '\\x' . str_pad($hex, 2, '0', STR_PAD_LEFT);
 		}, $escaped);
 	}
 

diff --git a/src/Parser/ConstExprParser.php b/src/Parser/ConstExprParser.php
@@ -4,27 +4,12 @@
 
 use PHPStan\PhpDocParser\Ast;
 use PHPStan\PhpDocParser\Lexer\Lexer;
-use function chr;
-use function hexdec;
-use function octdec;
-use function preg_replace_callback;
-use function str_replace;
 use function strtolower;
 use function substr;
 
 class ConstExprParser
 {
 
-	private const REPLACEMENTS = [
-		'\\' => '\\',
-		'n' => "\n",
-		'r' => "\r",
-		't' => "\t",
-		'f' => "\f",
-		'v' => "\v",
-		'e' => "\x1B",
-	];
-
 	/** @var bool */
 	private $unescapeStrings;
 
@@ -56,7 +41,7 @@ public function parse(TokenIterator $tokens, bool $trimStrings = false): Ast\Con
 			$type = $tokens->currentTokenType();
 			if ($trimStrings) {
 				if ($this->unescapeStrings) {
-					$value = self::unescapeString($value);
+					$value = StringUnescaper::unescapeString($value);
 				} else {
 					$value = substr($value, 1, -1);
 				}
@@ -171,75 +156,4 @@ private function parseArrayItem(TokenIterator $tokens): Ast\ConstExpr\ConstExprA
 		return new Ast\ConstExpr\ConstExprArrayItemNode($key, $value);
 	}
 
-	private static function unescapeString(string $string): string
-	{
-		$quote = $string[0];
-
-		if ($quote === '\'') {
-			return str_replace(
-				['\\\\', '\\\''],
-				['\\', '\''],
-				substr($string, 1, -1)
-			);
-		}
-
-		return self::parseEscapeSequences(substr($string, 1, -1), '"');
-	}
-
-	/**
-	 * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
-	 */
-	private static function parseEscapeSequences(string $str, string $quote): string
-	{
-		$str = str_replace('\\' . $quote, $quote, $str);
-
-		return preg_replace_callback(
-			'~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
-			static function ($matches) {
-				$str = $matches[1];
-
-				if (isset(self::REPLACEMENTS[$str])) {
-					return self::REPLACEMENTS[$str];
-				}
-				if ($str[0] === 'x' || $str[0] === 'X') {
-					return chr(hexdec(substr($str, 1)));
-				}
-				if ($str[0] === 'u') {
-					return self::codePointToUtf8(hexdec($matches[2]));
-				}
-
-				return chr(octdec($str));
-			},
-			$str
-		);
-	}
-
-	/**
-	 * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
-	 */
-	private static function codePointToUtf8(int $num): string
-	{
-		if ($num <= 0x7F) {
-			return chr($num);
-		}
-		if ($num <= 0x7FF) {
-			return chr(($num >> 6) + 0xC0)
-				. chr(($num & 0x3F) + 0x80);
-		}
-		if ($num <= 0xFFFF) {
-			return chr(($num >> 12) + 0xE0)
-				. chr((($num >> 6) & 0x3F) + 0x80)
-				. chr(($num & 0x3F) + 0x80);
-		}
-		if ($num <= 0x1FFFFF) {
-			return chr(($num >> 18) + 0xF0)
-				. chr((($num >> 12) & 0x3F) + 0x80)
-				. chr((($num >> 6) & 0x3F) + 0x80)
-				. chr(($num & 0x3F) + 0x80);
-		}
-
-		// Invalid UTF-8 codepoint escape sequence: Codepoint too large
-		return "\xef\xbf\xbd";
-	}
-
 }
diff --git a/src/Parser/StringUnescaper.php b/src/Parser/StringUnescaper.php
@@ -0,0 +1,96 @@
+<?php declare(strict_types = 1);
+
+namespace PHPStan\PhpDocParser\Parser;
+
+use function chr;
+use function hexdec;
+use function octdec;
+use function preg_replace_callback;
+use function str_replace;
+use function substr;
+
+class StringUnescaper
+{
+
+	private const REPLACEMENTS = [
+		'\\' => '\\',
+		'n' => "\n",
+		'r' => "\r",
+		't' => "\t",
+		'f' => "\f",
+		'v' => "\v",
+		'e' => "\x1B",
+	];
+
+	public static function unescapeString(string $string): string
+	{
+		$quote = $string[0];
+
+		if ($quote === '\'') {
+			return str_replace(
+				['\\\\', '\\\''],
+				['\\', '\''],
+				substr($string, 1, -1)
+			);
+		}
+
+		return self::parseEscapeSequences(substr($string, 1, -1), '"');
+	}
+
+	/**
+	 * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L90-L130
+	 */
+	private static function parseEscapeSequences(string $str, string $quote): string
+	{
+		$str = str_replace('\\' . $quote, $quote, $str);
+
+		return preg_replace_callback(
+			'~\\\\([\\\\nrtfve]|[xX][0-9a-fA-F]{1,2}|[0-7]{1,3}|u\{([0-9a-fA-F]+)\})~',
+			static function ($matches) {
+				$str = $matches[1];
+
+				if (isset(self::REPLACEMENTS[$str])) {
+					return self::REPLACEMENTS[$str];
+				}
+				if ($str[0] === 'x' || $str[0] === 'X') {
+					return chr(hexdec(substr($str, 1)));
+				}
+				if ($str[0] === 'u') {
+					return self::codePointToUtf8(hexdec($matches[2]));
+				}
+
+				return chr(octdec($str));
+			},
+			$str
+		);
+	}
+
+	/**
+	 * Implementation based on https://github.com/nikic/PHP-Parser/blob/b0edd4c41111042d43bb45c6c657b2e0db367d9e/lib/PhpParser/Node/Scalar/String_.php#L132-L154
+	 */
+	private static function codePointToUtf8(int $num): string
+	{
+		if ($num <= 0x7F) {
+			return chr($num);
+		}
+		if ($num <= 0x7FF) {
+			return chr(($num >> 6) + 0xC0)
+				. chr(($num & 0x3F) + 0x80);
+		}
+		if ($num <= 0xFFFF) {
+			return chr(($num >> 12) + 0xE0)
+				. chr((($num >> 6) & 0x3F) + 0x80)
+				. chr(($num & 0x3F) + 0x80);
+		}
+		if ($num <= 0x1FFFFF) {
+			return chr(($num >> 18) + 0xF0)
+				. chr((($num >> 12) & 0x3F) + 0x80)
+				. chr((($num >> 6) & 0x3F) + 0x80)
+				. chr(($num & 0x3F) + 0x80);
+		}
+
+		// Invalid UTF-8 codepoint escape sequence: Codepoint too large
+		return "\xef\xbf\xbd";
+	}
+
+}