Skip to content

Commit

Permalink
fix: trailing blank lines are part of fenced code block content (#2)
Browse files Browse the repository at this point in the history
  • Loading branch information
ikatyang authored Oct 14, 2019
1 parent 850a9ec commit 39daf72
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 25 deletions.
81 changes: 81 additions & 0 deletions corpus/custom.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,84 @@
================================================================================
Fenced code block - trailing blank lines are considered part of its content
================================================================================
- ```


- ~~~


- ```


- ~~~


- ```


- ~~~


-
--------------------------------------------------------------------------------

(document
(tight_list
(list_item
(list_marker)
(fenced_code_block
(code_fence_content
(line_break))))
(list_item
(list_marker)
(fenced_code_block
(code_fence_content
(line_break))))
(list_item
(list_marker)
(fenced_code_block
(code_fence_content
(virtual_space)
(virtual_space)
(line_break)
(virtual_space)
(virtual_space))))
(list_item
(list_marker)
(fenced_code_block
(code_fence_content
(virtual_space)
(virtual_space)
(line_break)
(virtual_space)
(virtual_space))))
(list_item
(list_marker)
(fenced_code_block
(code_fence_content
(line_break))))
(list_item
(list_marker)
(fenced_code_block
(code_fence_content
(line_break))))
(list_item
(list_marker))))

================================================================================
Fenced code block - EOF-only newlines are NOT considered part of its content
================================================================================
- ```


--------------------------------------------------------------------------------

(document
(tight_list
(list_item
(list_marker)
(fenced_code_block))))

================================================================================
Table (extension) - too more indentation in list item [is NOT table]
================================================================================
Expand Down
72 changes: 47 additions & 25 deletions src/tree_sitter_markdown/block_scan.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,11 @@ void scn_blk(Lexer &lxr, BlockDelimiterList &blk_dlms, const BlockContextStack &
: cur_ind >= ind && cur_ind - ind < 4);
if (is_pas_all_blk_ctx && !is_eol_chr(lxr.lka_chr()) && scn_tbl_dlm_row(lxr, tbl_col_cnt)) {
is_tbl = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_TBL_HED_ROW_BGN_MKR, bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_TBL_HED_ROW_BGN_MKR, 0));
}
}

if (!is_tbl) tmp_blk_dlms.push_back(BlockDelimiter(SYM_PGH_BGN_MKR, bgn_pos, bgn_pos));
if (!is_tbl) tmp_blk_dlms.push_back(BlockDelimiter(SYM_PGH_BGN_MKR, 0));
}
assert(!tmp_blk_dlms.empty());
push_lst_nod_mkr_if_necessary(blk_dlms, tmp_blk_dlms.front(), ind, blk_ctx_stk.empty() ? SYM_NOT_FOUND : blk_ctx_stk.back().sym());
Expand All @@ -102,14 +102,14 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
bool is_pas_all_blk_ctx = ctx_itr == ctx_end;
if (!is_pas_all_blk_ctx || is_eol_chr(lxr.lka_chr())) {
// disallow double blank lines in list item begin
blk_dlms.push_back(BlockDelimiter(SYM_LST_ITM_CNT_END_MKR, bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(SYM_LST_ITM_END_MKR, bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(SYM_LST_ITM_CNT_END_MKR, 0));
blk_dlms.push_back(BlockDelimiter(SYM_LST_ITM_END_MKR, 0));
}
}
return true;
}
if (ctx_sym == SYM_TBL_HED_ROW_BGN_MKR) {
blk_dlms.push_back(BlockDelimiter(SYM_TBL_ROW_END_MKR, bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(SYM_TBL_ROW_END_MKR, 0));
lxr.adv_if('\r');
lxr.adv_if('\n');
BlockContextStack::ConstIterator ctx_itr = blk_ctx_stk.begin();
Expand All @@ -127,12 +127,12 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
BlockContextStack::ConstReverseIterator itr = blk_ctx_stk.rbegin(), end = blk_ctx_stk.rend();
itr != end;
itr++
) blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(itr->sym()), bgn_pos, bgn_pos));
) blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(itr->sym()), 0));
return true;
}

if (!blk_ctx_stk.empty() && blk_ctx_stk.back().sym() == SYM_ATX_BGN) {
blk_dlms.push_back(BlockDelimiter(SYM_ATX_END_MKR, bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(SYM_ATX_END_MKR, 0));
return true;
}

Expand All @@ -146,6 +146,18 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
bool has_blk_lbk = false;

for (;;) {
// EOF-only newlines are NOT considered part of the fenced code block content
if (
is_eof_chr(lxr.lka_chr())
&& !blk_ctx_stk.empty()
&& (blk_ctx_stk.back().sym() == SYM_BTK_FEN_COD_BGN || blk_ctx_stk.back().sym() == SYM_TLD_FEN_COD_BGN)
) {
assert(!has_blk_lbk);
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
break;
}

BlockContextStack::ConstIterator ctx_itr = blk_ctx_stk.begin();
const BlockContextStack::ConstIterator ctx_end_itr = blk_ctx_stk.end();
LexedPosition lst_non_wsp_end_pos;
Expand All @@ -169,7 +181,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
) {
assert(!has_blk_lbk);
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
break;
}
bool is_vtr_spc_sensitive = blk_ctx_stk.back().sym() == SYM_IND_COD_BGN_MKR
Expand All @@ -185,6 +197,9 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
has_blk_lbk = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_BNK_LBK, lst_bgn_pos.dist(lst_non_wsp_end_pos) + ind_chr_cnt));
tmp_blk_dlms.push_vtr_spc(vrt_spc_cnt);
if (blk_ctx_stk.back().sym() == SYM_BTK_FEN_COD_BGN || blk_ctx_stk.back().sym() == SYM_TLD_FEN_COD_BGN) {
break;
}
} else if (is_pas_all_blk_ctx) {
if (blk_ctx_stk.empty() || blk_ctx_stk.back().sym() == SYM_BQT_BGN) {
assert(!has_blk_lbk);
Expand Down Expand Up @@ -251,7 +266,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
has_blk_lbk = false;
tmp_blk_dlms.clear();
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, 0));
}
break;
}
Expand All @@ -261,7 +276,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
has_blk_lbk = false;
tmp_blk_dlms.clear();
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, 0));
}
break;
}
Expand All @@ -271,7 +286,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
has_blk_lbk = false;
tmp_blk_dlms.clear();
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, 0));
}
break;
}
Expand All @@ -281,7 +296,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
has_blk_lbk = false;
tmp_blk_dlms.clear();
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, 0));
}
break;
}
Expand All @@ -291,7 +306,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
has_blk_lbk = false;
tmp_blk_dlms.clear();
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, 0));
}
break;
}
Expand All @@ -305,15 +320,15 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
if (BSR_ACCEPT == scn_blk_nod(lxr, tmp_blk_dlms, cur_ind, is_pas_all_blk_ctx, is_pgh_cont_ln)) {
has_opn_mkr = true;
has_end_mkr = true;
blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
blk_dlms.push_back(BlockDelimiter(SYM_LIT_LBK, bgn_pos, lst_non_wsp_end_pos));
BlockContextStack::ConstReverseIterator lst_blk_ctx_itr = ++blk_ctx_stk.rbegin();
Symbol lst_blk_ctx_sym = lst_blk_ctx_itr == blk_ctx_stk.rend() ? SYM_NOT_FOUND : lst_blk_ctx_itr->sym();
push_lst_nod_mkr_if_necessary(blk_dlms, tmp_blk_dlms.front(), cur_ind, lst_blk_ctx_sym);
} else if (is_pgh_cont_ln) {
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LIT_LBK, bgn_pos, lst_non_wsp_end_pos));
} else {
blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LIT_LBK, bgn_pos, lst_non_wsp_end_pos));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_TBL_DAT_ROW_BGN_MKR, 0));
}
Expand All @@ -339,7 +354,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
cur_ctx_itr++
) {
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(cur_ctx_itr->sym()), bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(cur_ctx_itr->sym()), 0));
if (&(*cur_ctx_itr) == fst_bqt_ctx) break;
}
break;
Expand All @@ -353,17 +368,24 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
) {
assert(!has_blk_lbk);
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
break;
}
has_blk_lbk = true;
tmp_blk_dlms.push_back(BlockDelimiter(SYM_BNK_LBK, lst_bgn_pos, lxr.cur_pos()));
if (blk_ctx_stk.back().sym() == SYM_BTK_FEN_COD_BGN || blk_ctx_stk.back().sym() == SYM_TLD_FEN_COD_BGN) {
break;
}
} else {
assert(!blk_ctx_stk.empty());
if (blk_ctx_stk.back().sym() == SYM_BTK_FEN_COD_BGN || blk_ctx_stk.back().sym() == SYM_TLD_FEN_COD_BGN) {
assert(!has_blk_lbk);
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
break;
}
if (
blk_ctx_stk.back().sym() == SYM_IND_COD_BGN_MKR
|| blk_ctx_stk.back().sym() == SYM_BTK_FEN_COD_BGN
|| blk_ctx_stk.back().sym() == SYM_TLD_FEN_COD_BGN
|| blk_ctx_stk.back().sym() == SYM_HTM_BLK_SCR_BGN
|| blk_ctx_stk.back().sym() == SYM_HTM_BLK_CMT_BGN
|| blk_ctx_stk.back().sym() == SYM_HTM_BLK_PRC_BGN
Expand All @@ -384,9 +406,9 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
has_blk_lbk = false;
tmp_blk_dlms.clear();
has_end_mkr = true;
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(blk_ctx_stk.back().sym()), 0));
if (tmp_blk_dlms.back().sym() == SYM_LST_ITM_CNT_END_MKR) {
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_ITM_END_MKR, bgn_pos, bgn_pos));
tmp_blk_dlms.push_back(BlockDelimiter(SYM_LST_ITM_END_MKR, 0));
}
break;
}
Expand All @@ -396,12 +418,12 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
BlockContextStack::ConstReverseIterator cur_ctx_itr = blk_ctx_stk.rbegin();
for (;; cur_ctx_itr++) {
has_end_mkr = true;
blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(cur_ctx_itr->sym()), bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(cur_ctx_itr->sym()), 0));
if (&(*cur_ctx_itr) == &(*fst_failed_ctx_itr)) break;
}
if (blk_dlms.back().sym() == SYM_LST_ITM_CNT_END_MKR) {
cur_ctx_itr++;
blk_dlms.push_back(BlockDelimiter(SYM_LST_ITM_END_MKR, bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(SYM_LST_ITM_END_MKR, 0));
}
cur_ctx_itr++;
if (
Expand All @@ -415,7 +437,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
)
) {
cur_ctx_itr++;
blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, bgn_pos, bgn_pos));
blk_dlms.push_back(BlockDelimiter(SYM_LST_END_MKR, 0));
}
blk_dlms.push_back(BlockDelimiter(SYM_LIT_LBK, bgn_pos, lst_non_wsp_end_pos));
const BlockContextStack::ConstReverseIterator lst_blk_ctx_itr = cur_ctx_itr;
Expand All @@ -441,7 +463,7 @@ bool /*is_interrupted*/ scn_eol(Lexer &lxr, BlockDelimiterList &blk_dlms, BlockC
BlockContextStack::ConstReverseIterator itr = blk_ctx_stk.rbegin(), end = blk_ctx_stk.rend();
itr != end;
itr++, has_end_mkr = true
) tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(itr->sym()), bgn_pos, bgn_pos));
) tmp_blk_dlms.push_back(BlockDelimiter(get_blk_cls_sym(itr->sym()), 0));
break;
}

Expand Down

0 comments on commit 39daf72

Please sign in to comment.