From 787d4b765b0ca4e1e50908d99497360fc4b228b5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Mit=C3=A1=C5=A1?= Date: Wed, 7 Feb 2024 11:44:39 +0100 Subject: [PATCH] Introduce an overall limit to link. ref. defs instantiations. This is to prevent time and output size explosion in case of input pattern generated by this: $ python -c 'N=1000; print("[x]: " + "x" * N + "\n[x]" * N)' We roughly allow to blowing up the input size of the document 16 times by link reference definitions or up to 1 MB, whatever is smaller. When the threashold is reached, following reference definitions are sent to output unresolved as a text. Fixes #238. --- src/md4c.c | 23 +++++++++++++++++++++-- test/pathological-tests.py | 5 ++++- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/md4c.c b/src/md4c.c index 30388632..4a4e4012 100644 --- a/src/md4c.c +++ b/src/md4c.c @@ -26,6 +26,7 @@ #include "md4c.h" #include +#include #include #include #include @@ -143,6 +144,9 @@ #define SZ MD_SIZE #define OFF MD_OFFSET +#define SZ_MAX (sizeof(SZ) == 8 ? UINT64_MAX : UINT32_MAX) +#define OFF_MAX (sizeof(OFF) == 8 ? UINT64_MAX : UINT32_MAX) + typedef struct MD_MARK_tag MD_MARK; typedef struct MD_BLOCK_tag MD_BLOCK; typedef struct MD_CONTAINER_tag MD_CONTAINER; @@ -180,6 +184,7 @@ struct MD_CTX_tag { int alloc_ref_defs; void** ref_def_hashtable; int ref_def_hashtable_size; + SZ max_ref_def_output; /* Stack of inline/span markers. * This is only used for parsing a single block contents but by storing it @@ -2283,11 +2288,14 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, int is_multiline; CHAR* label; SZ label_size; - int ret; + int ret = FALSE; MD_ASSERT(CH(beg) == _T('[') || CH(beg) == _T('!')); MD_ASSERT(CH(end-1) == _T(']')); + if(ctx->max_ref_def_output == 0) + return FALSE; + beg += (CH(beg) == _T('!') ? 2 : 1); end--; @@ -2315,7 +2323,17 @@ md_is_link_reference(MD_CTX* ctx, const MD_LINE* lines, MD_SIZE n_lines, if(is_multiline) free(label); - ret = (def != NULL); + if(def != NULL) { + /* See https://github.com/mity/md4c/issues/238 */ + MD_SIZE output_size_estimation = def->label_size + def->title_size + def->dest_end - def->dest_beg; + if(output_size_estimation < ctx->max_ref_def_output) { + ctx->max_ref_def_output -= output_size_estimation; + ret = TRUE; + } else { + MD_LOG("Too many link reference definition instantiations."); + ctx->max_ref_def_output = 0; + } + } abort: return ret; @@ -6470,6 +6488,7 @@ md_parse(const MD_CHAR* text, MD_SIZE size, const MD_PARSER* parser, void* userd ctx.code_indent_offset = (ctx.parser.flags & MD_FLAG_NOINDENTEDCODEBLOCKS) ? (OFF)(-1) : 4; md_build_mark_char_map(&ctx); ctx.doc_ends_with_newline = (size > 0 && ISNEWLINE_(text[size-1])); + ctx.max_ref_def_output = MIN(MIN(16 * (uint64_t)size, (uint64_t)(1024 * 1024)), (uint64_t)SZ_MAX); /* Reset all mark stacks and lists. */ for(i = 0; i < (int) SIZEOF_ARRAY(ctx.opener_stacks); i++) diff --git a/test/pathological-tests.py b/test/pathological-tests.py index 924cbe97..66b85ad9 100644 --- a/test/pathological-tests.py +++ b/test/pathological-tests.py @@ -102,7 +102,10 @@ "--ftables"), "many broken links": (("]([\n" * 50000), - re.compile("

(\]\(\[\r?\n){49999}\]\(\[

")) + re.compile("

(\]\(\[\r?\n){49999}\]\(\[

")), + "many link ref. def. instantiations": + (("[x]: " + "x" * 50000 + "\n[x]" * 50000), + re.compile("")) } whitespace_re = re.compile('/s+/')