Skip to content

Commit

Permalink
block containers: fix parsing of trailing whitespace
Browse files Browse the repository at this point in the history
re #475
  • Loading branch information
biojppm committed Jan 18, 2025
1 parent 8abfacd commit 06645a8
Show file tree
Hide file tree
Showing 5 changed files with 192 additions and 33 deletions.
1 change: 1 addition & 0 deletions changelog/current.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
- [PR#488](https://github.com/biojppm/rapidyaml/pull/488):
- add workarounds for problems with codegen of gcc 11,12,13
- improve CI coverage of gcc and clang optimization levels
- Fix [#475](https://github.com/biojppm/rapidyaml/issues/475): parse error on trailing whitespace in block containers
2 changes: 1 addition & 1 deletion ext/c4core
Submodule c4core updated 0 files
92 changes: 65 additions & 27 deletions src/c4/yml/parse_engine.def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5813,43 +5813,63 @@ void ParseEngine<EventHandler>::_handle_seq_block()
// handle indentation
//
_c4dbgpf("seqblck[RNXT]: indref={} indentation={}", m_evt_handler->m_curr->indref, m_evt_handler->m_curr->line_contents.indentation);
if(C4_UNLIKELY(!_at_line_begin()))
_c4err("parse error");
if(m_evt_handler->m_curr->indentation_ge())
{
_c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
_line_progressed(m_evt_handler->m_curr->indref);
_maybe_skip_whitespace_tokens();
rem = m_evt_handler->m_curr->line_contents.rem;
if(!rem.len)
goto seqblck_again;
}
else if(m_evt_handler->m_curr->indentation_lt())
if(C4_LIKELY(_at_line_begin()))
{
_c4dbgp("seqblck[RNXT]: smaller indentation!");
_handle_indentation_pop_from_block_seq();
if(has_all(RSEQ|BLCK))
_c4dbgp("seqblck[RNXT]: at line begin");
if(m_evt_handler->m_curr->indentation_ge())
{
_c4dbgp("seqblck[RNXT]: still seqblck!");
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
_c4dbgpf("seqblck[RNXT]: skip {} from indref", m_evt_handler->m_curr->indref);
_line_progressed(m_evt_handler->m_curr->indref);
_maybe_skip_whitespace_tokens();
rem = m_evt_handler->m_curr->line_contents.rem;
if(!rem.len)
goto seqblck_again;
}
else
else if(m_evt_handler->m_curr->indentation_lt())
{
_c4dbgp("seqblck[RNXT]: no longer seqblck!");
goto seqblck_finish;
_c4dbgp("seqblck[RNXT]: smaller indentation!");
_handle_indentation_pop_from_block_seq();
if(has_all(RSEQ|BLCK))
{
_c4dbgp("seqblck[RNXT]: still seqblck!");
_RYML_CB_ASSERT(m_evt_handler->m_stack.m_callbacks, has_any(RNXT));
_line_progressed(m_evt_handler->m_curr->line_contents.indentation);
rem = m_evt_handler->m_curr->line_contents.rem;
if(!rem.len)
goto seqblck_again;

Check warning on line 5839 in src/c4/yml/parse_engine.def.hpp

View check run for this annotation

Codecov / codecov/patch

src/c4/yml/parse_engine.def.hpp#L5839

Added line #L5839 was not covered by tests
}
else
{
_c4dbgp("seqblck[RNXT]: no longer seqblck!");
goto seqblck_finish;
}
}
else if(m_evt_handler->m_curr->line_contents.indentation == npos)
{
_c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
_line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
rem = m_evt_handler->m_curr->line_contents.rem;
if(!rem.len)
goto seqblck_again;
}
}
else if(m_evt_handler->m_curr->line_contents.indentation == npos)
else
{
_c4dbgpf("seqblck[RNXT]: blank line, len={}", m_evt_handler->m_curr->line_contents.rem);
_line_progressed(m_evt_handler->m_curr->line_contents.rem.len);
rem = m_evt_handler->m_curr->line_contents.rem;
if(!rem.len)
goto seqblck_again;
_c4dbgp("seqblck[RNXT]: NOT at line begin");
if(!rem.begins_with_any(" \t"))
{
_c4err("parse error");
}
else
{
_skipchars(" \t");
rem = m_evt_handler->m_curr->line_contents.rem;
if(!rem.len)
{
_c4dbgp("seqblck[RNXT]: again");
goto seqblck_again;
}
}
}
//
// now handle the tokens
Expand Down Expand Up @@ -6750,6 +6770,24 @@ void ParseEngine<EventHandler>::_handle_map_block()
}
}
}
else
{
_c4dbgp("mapblck[RNXT]: NOT at line begin");
if(!rem.begins_with_any(" \t"))
{
_c4err("parse error");
}
else
{
_skipchars(" \t");
rem = m_evt_handler->m_curr->line_contents.rem;
if(!rem.len)
{
_c4dbgp("seqblck[RNXT]: again");
goto mapblck_again;
}
}
}
//
// handle tokens
//
Expand Down
111 changes: 111 additions & 0 deletions test/test_github_issues.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,117 @@
namespace c4 {
namespace yml {

TEST(github, 475_0_space)
{
Tree t;
ExpectError::check_success(&t, [&t]{
parse_in_arena(R"(
test:
- {a: 1}
# next line has a trailing space
- {b: 2}
# next line has a trailing space
- [0, {c: 3}]
)", &t);
});
ConstNodeRef test = t["test"];
ASSERT_TRUE(test.is_seq());
ASSERT_EQ(test.num_children(), 3u);
}

TEST(github, 475_1_space_indented)
{
Tree t;
ExpectError::check_success(&t, [&t]{
parse_in_arena(R"(
test:
- {a: 1}
# next line has a trailing space
- {b: 2}
# next line has a trailing space
- [0, {c: 3}]
)", &t);
});
ConstNodeRef test = t["test"];
ASSERT_TRUE(test.is_seq());
ASSERT_EQ(test.num_children(), 3u);
}

TEST(github, 475_2_tab)
{
Tree t;
ExpectError::check_success(&t, [&t]{
parse_in_arena(R"(
test:
- {a: 1}
# next line has a trailing tab
- {b: 2}
# next line has a trailing tab
- [0, {c: 3}]
)", &t);
});
ConstNodeRef test = t["test"];
ASSERT_TRUE(test.is_seq());
ASSERT_EQ(test.num_children(), 3u);
}

TEST(github, 475_3_tab_indented)
{
Tree t;
ExpectError::check_success(&t, [&t]{
parse_in_arena(R"(
test:
- {a: 1}
# next line has a trailing tab
- {b: 2}
# next line has a trailing tab
- [0, {c: 3}]
)", &t);
});
ConstNodeRef test = t["test"];
ASSERT_TRUE(test.is_seq());
ASSERT_EQ(test.num_children(), 3u);
}

TEST(github, 475_4_space_map)
{
Tree t;
ExpectError::check_success(&t, [&t]{
parse_in_arena(R"(
test:
0: {a: 1}
# next line has a trailing space
1: {b: 2}
# next line has a trailing space
2: [0, {c: 3}]
)", &t);
});
ConstNodeRef test = t["test"];
ASSERT_TRUE(test.is_map());
ASSERT_EQ(test.num_children(), 3u);
}

TEST(github, 475_5_tab_map)
{
Tree t;
ExpectError::check_success(&t, [&t]{
parse_in_arena(R"(
test:
0: {a: 1}
# next line has a trailing tab
1: {b: 2}
# next line has a trailing tab
2: [0, {c: 3}]
)", &t);
});
ConstNodeRef test = t["test"];
ASSERT_TRUE(test.is_map());
ASSERT_EQ(test.num_children(), 3u);
}


//-----------------------------------------------------------------------------

TEST(github, 455_0_ok)
{
Tree t;
Expand Down
19 changes: 14 additions & 5 deletions test/test_lib/test_case.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,32 +237,41 @@ ExpectError::ExpectError(Tree *tree, Location loc)
c4::yml::Callbacks tcb((void*)this, tree ? m_tree_prev.m_allocate : nullptr, tree ? m_tree_prev.m_free : nullptr, err);
c4::yml::Callbacks gcb((void*)this, m_glob_prev.m_allocate, m_glob_prev.m_free, err);
#endif
_c4dbgp("setting error callback");
if(tree)
{
_c4dbgp("setting error callback: tree");
tree->callbacks(tcb);
}
_c4dbgp("setting error callback: global");
set_callbacks(gcb);
}

ExpectError::~ExpectError()
{
if(m_tree)
{
_c4dbgp("resetting error callback: tree");
m_tree->callbacks(m_tree_prev);
}
_c4dbgp("resetting error callback: global");
set_callbacks(m_tree_prev);
_c4dbgp("resetting error callback");
}

void ExpectError::check_success(Tree *tree, std::function<void()> fn)
{
auto context = ExpectError(tree, {});
Location expected_location = {};
auto context = ExpectError(tree, expected_location);
C4_IF_EXCEPTIONS_(try, if(setjmp(s_jmp_env_expect_error) == 0))
{
_c4dbgp("check expected success");
fn();
_c4dbgp("check expected success: success!");
}
C4_IF_EXCEPTIONS_(catch(ExpectedError const&), else)
{
;
FAIL() << "check expected success: failed!";
}
EXPECT_FALSE(context.m_got_an_error);
ASSERT_FALSE(context.m_got_an_error);
}

void ExpectError::check_error(Tree const* tree, std::function<void()> fn, Location expected_location)
Expand Down

0 comments on commit 06645a8

Please sign in to comment.