diff --git a/lib/PhpParser/Lexer.php b/lib/PhpParser/Lexer.php index 07ec5cac54..012c1f9438 100644 --- a/lib/PhpParser/Lexer.php +++ b/lib/PhpParser/Lexer.php @@ -143,6 +143,16 @@ private function errorMayHaveOccurred() : bool { return true; } + if (PHP_VERSION_ID >= 80000) { + // PHP 8 converts the "bad character" case into a parse error, rather than treating + // it as a lexing warning. To preserve previous behavior, we need to assume that an + // error occurred. + // TODO: We should handle this the same way as PHP 8: Only generate T_BAD_CHARACTER + // token here (for older PHP versions) and leave generationg of the actual parse error + // to the parser. This will also save the full token scan on PHP 8 here. + return true; + } + return null !== error_get_last(); }