From beb97a9dcb640876ef4f84e4ddb4616f6ec13db5 Mon Sep 17 00:00:00 2001 From: Dirk Pranke Date: Sun, 8 May 2022 18:42:02 -0700 Subject: [PATCH] Fix issue #47: syntax error points to wrong position ... Due to the way the parser was handing nested parts of the JSON5 grammar, we would incorrectly report errors as having happened at the start of a block of content, rather than where the error actually happened. A simple case of this would be reporting `"foo"bar` as happening at column 1 rather than column 5. The bug report (#47) has a more complicated and realistic example. To fix this we needed to rev the underlying parser generator (glop) to v0.6.4 and also rewrite the JSON5 grammar to use a positive lookahead. --- json5/json5.g | 2 +- json5/parser.py | 18 ++++++++++++++---- tests/lib_test.py | 15 +++++++++++++++ 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/json5/json5.g b/json5/json5.g index 1bbb798..a33ccd2 100644 --- a/json5/json5.g +++ b/json5/json5.g @@ -4,7 +4,7 @@ sp = ws* ws = '\u0020' | eol | comment | '\u0009' | '\u000B' | '\u000C' | '\u00A0' | '\uFEFF' - | anything:x ?( is_unicat(x, 'Zs') ) -> x + | ~~(anything:x ?( is_unicat(x, 'Zs') )) anything:x -> x eol = '\u000D' '\u000A' | '\u000D' | '\u000A' | '\u2028' | '\u2029' diff --git a/json5/parser.py b/json5/parser.py index a2a0039..86bc194 100644 --- a/json5/parser.py +++ b/json5/parser.py @@ -70,11 +70,13 @@ def _bind(self, rule, var): def _not(self, rule): p = self.pos + errpos = self.errpos rule() if self.failed: self._succeed(None, p) else: self._rewind(p) + self.errpos = errpos self._fail() def _opt(self, rule): @@ -100,8 +102,6 @@ def _star(self, rule, vs=None): rule() if self.failed: self._rewind(p) - if p < self.errpos: - self.errpos = p break else: vs.append(self.val) @@ -200,11 +200,21 @@ def _ws__c7_(self): def _ws__c8_(self): self._push('ws__c8') - self._seq([lambda: self._bind(self._anything_, 'x'), self._ws__c8__s1_, + self._seq([self._ws__c8__s0_, lambda: self._bind(self._anything_, 'x'), lambda: self._succeed(self._get('x'))]) self._pop('ws__c8') - def _ws__c8__s1_(self): + def _ws__c8__s0_(self): + self._not(lambda: self._not(self._ws__c8__s0_n_n_)) + + def _ws__c8__s0_n_n_(self): + (lambda: self._choose([self._ws__c8__s0_n_n_g__c0_]))() + + def _ws__c8__s0_n_n_g__c0_(self): + self._seq([lambda: self._bind(self._anything_, 'x'), + self._ws__c8__s0_n_n_g__c0__s1_]) + + def _ws__c8__s0_n_n_g__c0__s1_(self): v = self._is_unicat(self._get('x'), 'Zs') if v: self._succeed(v) diff --git a/tests/lib_test.py b/tests/lib_test.py index f978650..a6d2473 100644 --- a/tests/lib_test.py +++ b/tests/lib_test.py @@ -252,6 +252,21 @@ def test_whitespace(self): self.check(u'\u2028 1', 1) self.check(u'\u2029 1', 1) + def test_error_reporting(self): + self.check_fail('[ ,]', + err=':1 Unexpected "," at column 3') + + self.check_fail( + '{\n' + ' version: "1.0",\n' + ' author: "John Smith",\n' + ' people : [\n' + ' "Monty",\n' + ' "Python"foo\n' + ' ]\n' + '}\n', + err=':6 Unexpected "f" at column 17') + class TestDump(unittest.TestCase): def test_basic(self):