From 3d67064f2d98e1ffac9cf9a675634453404785e9 Mon Sep 17 00:00:00 2001 From: Ika Date: Sun, 24 May 2020 15:39:53 +0800 Subject: [PATCH] feat: support super long words Input: (word that is longer than 255 chars without any whitespace) Before: (assertion error) After: (parsed successfully) --- corpus/custom.txt | 25 ++++++++++++++++++++ src/tree_sitter_markdown/inline_delimiter.cc | 20 +++++++++++++--- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/corpus/custom.txt b/corpus/custom.txt index 71426f1..dbf012c 100644 --- a/corpus/custom.txt +++ b/corpus/custom.txt @@ -923,3 +923,28 @@ Empty Image Description (document (paragraph (text))) + +================================================================================ +Super Long Word +================================================================================ +12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 + +-------------------------------------------------------------------------------- + +(document + (paragraph + (text))) + +================================================================================ +Super Long Extended Autolinks +================================================================================ +foo www.12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890.com/12345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890 bar + +-------------------------------------------------------------------------------- + +(document + (paragraph + (text) + (www_autolink + (text)) + (text))) diff --git a/src/tree_sitter_markdown/inline_delimiter.cc b/src/tree_sitter_markdown/inline_delimiter.cc index eff2c44..daa2add 100644 --- a/src/tree_sitter_markdown/inline_delimiter.cc +++ b/src/tree_sitter_markdown/inline_delimiter.cc @@ -1,7 +1,8 @@ #include "./inline_delimiter.h" - #include "./predicate.h" +#define MAX_INL_DLM_LEN 0b11111111 + namespace tree_sitter_markdown { bool MinimizedInlineDelimiter::yes() const { return yes_; } @@ -14,7 +15,7 @@ MinimizedInlineDelimiter::MinimizedInlineDelimiter(const bool yes, const Symbol unsigned MinimizedInlineDelimiter::serialize(unsigned char *buffer) const { assert(is_inl_sym(sym_)); assert(sym_ <= 0b1111111); - assert(len_ <= 0b11111111); + assert(len_ <= MAX_INL_DLM_LEN); buffer[0] = (sym_ << 1) | yes_; buffer[1] = len_; return 2; @@ -231,7 +232,20 @@ InlineDelimiterList::Iterator InlineDelimiterList::erase(Iterator itr, Iterator void InlineDelimiterList::transfer_to(MinimizedInlineDelimiterList &minimized_list) { while (!empty()) { - minimized_list.push_back(front().to_min()); + const MinimizedInlineDelimiter &inl_dlm = front().to_min(); + if (inl_dlm.len() <= MAX_INL_DLM_LEN) { + minimized_list.push_back(inl_dlm); + } else { + // split SYM_EXT_AUT_LNK_BGN/SYM_EXT_AUT_LNK_CTN into multiple parts to bypass length limit for inline delimeters + assert(inl_dlm.sym() == SYM_EXT_AUT_LNK_BGN || inl_dlm.sym() == SYM_EXT_AUT_LNK_CTN); + minimized_list.push_back(MinimizedInlineDelimiter(inl_dlm.yes(), inl_dlm.sym(), MAX_INL_DLM_LEN)); + LexedLength rst_len = inl_dlm.len() - MAX_INL_DLM_LEN; + while (rst_len > 0) { + LexedLength len = rst_len > MAX_INL_DLM_LEN ? MAX_INL_DLM_LEN : rst_len; + minimized_list.push_back(MinimizedInlineDelimiter(inl_dlm.yes(), SYM_EXT_AUT_LNK_CTN, len)); + rst_len -= len; + } + } pop_front(); } }