diff --git a/smart-blocks/parsers/markdown.js b/smart-blocks/parsers/markdown.js index 5e441a8e..5d420b58 100644 --- a/smart-blocks/parsers/markdown.js +++ b/smart-blocks/parsers/markdown.js @@ -1,19 +1,67 @@ +/** + * Parses a Markdown string and returns a flat mapping of heading-like keys to their start and end line numbers. + * This includes special handling for: + * - Multiple occurrences of the same top-level heading (e.g. "# Overview[2]") + * - Subheadings (nested headings are joined in the key with additional "#" characters) + * - Frontmatter demarcated by "---" as a special heading: "#---frontmatter---" + * - Code blocks delimited by triple backticks (```), which ignore headings found within + * - Top-level list items treated as sub-blocks under their parent heading + * + * The returned object keys reflect the path of headings (or list items) in the document, and + * each value is an array of two numbers: the starting line and the ending line for that key's content. + * + * @function parse_blocks + * @param {string} markdown - The complete Markdown text to parse. + * @returns {Object.} A mapping of string keys (representing headings or sub-blocks) + * to an array of two numbers indicating the inclusive start and end line indices (1-based) in the Markdown text. + * Example: + * ```json + * { + * "#Top-Level Heading": [1, 6], + * "#Top-Level Heading###Level 3 Heading (Skipping Level 2)": [3, 6], + * "#Another Top-Level Heading": [7, 18], + * // ... + * } + * ``` + */ export function parse_blocks(markdown) { const lines = markdown.split('\n'); + + // The final result object mapping block-keys to line ranges: [start_line, end_line]. const result = {}; + + // Stack tracking the open headings (as objects with `level`, `title`, `key`). const heading_stack = []; + + // Stores the line range ([start, end]) for each heading or sub-block key. const heading_lines = {}; - const heading_counts = {}; // For tracking multiple occurrences of top-level headings - const sub_block_counts = {}; // Map from heading key to counts of sub-blocks under it - const subheading_counts = {}; // For tracking duplicate subheadings under the same parent + + // For tracking multiple occurrences of top-level headings. + const heading_counts = {}; + + // For tracking the count of sub-blocks under a given heading key (used to append "#{n}" to keys). + const sub_block_counts = {}; + + // For tracking duplicate subheadings under the same parent (appended as "#{x}" at the end). + const subheading_counts = {}; + + // Tracks the currently open top-level list item block if any. let current_list_item = null; + + // Tracks the currently open content block if any (non-list content that follows a heading). let current_content_block = null; + + // Whether we are currently in the frontmatter section delimited by "---". let in_frontmatter = false; let frontmatter_started = false; - let root_heading_key = '#'; + + // The root heading key for content that appears before any heading ("#" for top-level). + const root_heading_key = '#'; + + // Whether we are currently in a fenced code block delimited by triple backticks (```). let in_code_block = false; - // Initialize sub_block_counts for root heading + // Initialize sub-block counts for the root heading key. sub_block_counts[root_heading_key] = 0; for (let i = 0; i < lines.length; i++) { @@ -21,49 +69,52 @@ export function parse_blocks(markdown) { const line = lines[i]; const trimmed_line = line.trim(); - // Handle frontmatter + // Handle frontmatter start/end demarcations. if (trimmed_line === '---') { if (!frontmatter_started) { - // Start of frontmatter + // Start of frontmatter. frontmatter_started = true; in_frontmatter = true; heading_lines['#---frontmatter---'] = [line_number, null]; continue; } else if (in_frontmatter) { - // End of frontmatter + // End of frontmatter. in_frontmatter = false; heading_lines['#---frontmatter---'][1] = line_number; continue; } } + // If we are within frontmatter lines, skip further processing. if (in_frontmatter) { - // Do nothing else while in frontmatter continue; } - // Check for code block start/end + // Check for code block start/end using triple backticks. if (trimmed_line.startsWith('```')) { in_code_block = !in_code_block; - // Include the code block line in the content + // Include the code block line as part of the content for whichever block is open. if (!current_content_block) { - // Start content block - let parent_key = heading_stack.length > 0 ? heading_stack[heading_stack.length - 1].key : '#'; - if (parent_key === '#' && !heading_lines[root_heading_key]) { + // Start a new content block (or sub-block) if not already within one. + const parent_key = heading_stack.length > 0 ? heading_stack[heading_stack.length - 1].key : root_heading_key; + + if (parent_key === root_heading_key && !heading_lines[root_heading_key]) { + // If no heading yet, root acts as heading for this code block. heading_lines[root_heading_key] = [line_number, null]; } - if (parent_key === '#') { - // Include code block directly under '#' + + if (parent_key === root_heading_key) { + // Directly under root heading current_content_block = { key: root_heading_key, start_line: line_number }; - // Update the end line of '#' heading + // Possibly update the end line for the root heading key. if (heading_lines[root_heading_key][1] === null || heading_lines[root_heading_key][1] < line_number) { - heading_lines[root_heading_key][1] = null; // Will be set at the end + heading_lines[root_heading_key][1] = null; // Will set proper end later } } else { if (sub_block_counts[parent_key] === undefined) { sub_block_counts[parent_key] = 0; } - sub_block_counts[parent_key] +=1; + sub_block_counts[parent_key] += 1; const n = sub_block_counts[parent_key]; const key = `${parent_key}#{${n}}`; heading_lines[key] = [line_number, null]; @@ -73,13 +124,13 @@ export function parse_blocks(markdown) { continue; } - // Check for headings + // If not in code block, check for headings (lines starting with one or more "#" followed by space). const heading_match = trimmed_line.match(/^(#{1,6})\s*(.+)$/); if (heading_match && !in_code_block) { - const level = heading_match[1].length; + const level = heading_match[1].length; // Number of "#" is the heading level let title = heading_match[2].trim(); - // Pop headings from stack until last level < current level + // Pop headings from stack until the last heading has a smaller level than current. while ( heading_stack.length > 0 && heading_stack[heading_stack.length - 1].level >= level @@ -90,12 +141,16 @@ export function parse_blocks(markdown) { } } - // Close root heading if open - if (heading_stack.length === 0 && heading_lines[root_heading_key] && heading_lines[root_heading_key][1] === null) { + // Close the root heading if open and we're now seeing a new top-level heading. + if ( + heading_stack.length === 0 && + heading_lines[root_heading_key] && + heading_lines[root_heading_key][1] === null + ) { heading_lines[root_heading_key][1] = line_number - 1; } - // Close any open content block + // Close any open content block. if (current_content_block) { if (heading_lines[current_content_block.key][1] === null) { heading_lines[current_content_block.key][1] = line_number - 1; @@ -103,7 +158,7 @@ export function parse_blocks(markdown) { current_content_block = null; } - // Close any open list item + // Close any open list item. if (current_list_item) { if (heading_lines[current_list_item.key][1] === null) { heading_lines[current_list_item.key][1] = line_number - 1; @@ -111,63 +166,59 @@ export function parse_blocks(markdown) { current_list_item = null; } - // Determine parent key and parent level + // Determine the parent key based on the current heading stack. let parent_key = ''; let parent_level = 0; if (heading_stack.length > 0) { parent_key = heading_stack[heading_stack.length - 1].key; parent_level = heading_stack[heading_stack.length - 1].level; } else { - parent_key = ''; // No parent key for top-level headings + parent_key = ''; // This is a top-level heading with no parent parent_level = 0; } - // Handle duplicates + // If this is a top-level heading (stack empty), handle duplicates by appending [n]. if (heading_stack.length === 0) { - // This is a top-level heading heading_counts[title] = (heading_counts[title] || 0) + 1; if (heading_counts[title] > 1) { title += `[${heading_counts[title]}]`; } } else { - // Subheading under parent + // Subheading under an existing parent heading; track duplicates similarly. if (!subheading_counts[parent_key]) { subheading_counts[parent_key] = {}; } - subheading_counts[parent_key][title] = (subheading_counts[parent_key][title] || 0) +1; + subheading_counts[parent_key][title] = (subheading_counts[parent_key][title] || 0) + 1; const count = subheading_counts[parent_key][title]; if (count > 1) { title += `#{${count}}`; } } - // Calculate number of '#'s to add based on level difference + // The heading's effective level is the difference in nesting from its parent. const level_diff = level - parent_level; const hashes = '#'.repeat(level_diff); - - // Build current heading key const key = parent_key + hashes + title; - // Initialize line range + // Initialize the heading line range. End line is unknown yet (null). heading_lines[key] = [line_number, null]; - // Initialize sub_block_counts for this heading + // Initialize sub-block count for this heading. sub_block_counts[key] = 0; - // Push to stack + // Push the new heading onto the stack. heading_stack.push({ level, title, key }); continue; } - // Check for top-level list items (no indentation, start with '- ') + // Check for top-level list items (no indentation, starting with "- "). const list_match = line.match(/^(\s*)- (.+)$/); if (list_match && !in_code_block) { const indentation = list_match[1].length; - - // Only process top-level list items (no indentation) + // Only consider unindented lines as top-level list items. if (indentation === 0) { - // Close previous list item if any + // Close any currently open list item. if (current_list_item) { if (heading_lines[current_list_item.key][1] === null) { heading_lines[current_list_item.key][1] = line_number - 1; @@ -175,7 +226,7 @@ export function parse_blocks(markdown) { current_list_item = null; } - // Close any open content block + // Close any open content block. if (current_content_block) { if (heading_lines[current_content_block.key][1] === null) { heading_lines[current_content_block.key][1] = line_number - 1; @@ -183,49 +234,45 @@ export function parse_blocks(markdown) { current_content_block = null; } - // Get current heading key - let parent_key = heading_stack.length > 0 ? heading_stack[heading_stack.length - 1].key : '#'; + // Determine the parent heading key. + let parent_key = heading_stack.length > 0 ? heading_stack[heading_stack.length - 1].key : root_heading_key; - // If parent_key is '#', ensure heading_lines['#'] is initialized - if (parent_key === '#' && !heading_lines[root_heading_key]) { + // If the parent is root, ensure the root heading range is initialized. + if (parent_key === root_heading_key && !heading_lines[root_heading_key]) { heading_lines[root_heading_key] = [line_number, null]; } - // Initialize sub_block_counts for parent_key if undefined + // Increment sub-block count under the parent heading. if (sub_block_counts[parent_key] === undefined) { sub_block_counts[parent_key] = 0; } - - // Increment sub block count sub_block_counts[parent_key] += 1; const n = sub_block_counts[parent_key]; - // Build list item key + // Build a key representing this list item. const key = `${parent_key}#{${n}}`; - - // Initialize line range heading_lines[key] = [line_number, null]; - // Update current list item + // Mark this as the current list item. current_list_item = { key, start_line: line_number }; continue; } - // Indented list items are part of the current list item + // Indented list items are assumed part of the current list item's content. if (current_list_item) { continue; } } - // Handle empty lines: do nothing + // Ignore empty lines. if (trimmed_line === '') { continue; } - // Handle content - // If not in content block, start one + // If none of the above conditions met, we're dealing with normal content lines. + // Create or continue a content block under the parent heading. if (!current_content_block) { - // Close any open list item + // If there's a list item open, close it; this content isn't part of that list item. if (current_list_item) { if (heading_lines[current_list_item.key][1] === null) { heading_lines[current_list_item.key][1] = line_number - 1; @@ -233,50 +280,37 @@ export function parse_blocks(markdown) { current_list_item = null; } - // Get current heading key - let parent_key = heading_stack.length > 0 ? heading_stack[heading_stack.length - 1].key : '#'; + // Determine the parent heading key (or use root if none). + let parent_key = heading_stack.length > 0 ? heading_stack[heading_stack.length - 1].key : root_heading_key; - // If parent_key is '#' and content is not a list item, include line in '#' directly - if (parent_key === '#') { - // Ensure heading_lines['#'] is initialized + // If content is under the root, make sure the root heading range is set. + if (parent_key === root_heading_key) { if (!heading_lines[root_heading_key]) { heading_lines[root_heading_key] = [line_number, null]; } - // Update the end line of '#' heading if (heading_lines[root_heading_key][1] === null || heading_lines[root_heading_key][1] < line_number) { - heading_lines[root_heading_key][1] = null; // Will be set at the end + heading_lines[root_heading_key][1] = null; // Defer finalizing the end line } - - // Update current_content_block to '#' heading current_content_block = { key: root_heading_key, start_line: line_number }; } else { - // Initialize sub_block_counts for parent_key if undefined + // This is a sub-block of the last heading on the stack. if (sub_block_counts[parent_key] === undefined) { sub_block_counts[parent_key] = 0; } - - // Increment sub block count sub_block_counts[parent_key] += 1; const n = sub_block_counts[parent_key]; - - // Build content block key const key = `${parent_key}#{${n}}`; - - // Initialize line range heading_lines[key] = [line_number, null]; - - // Update current content block current_content_block = { key, start_line: line_number }; } } - // Continue; content block remains open + // Continue reading lines until something else (heading, list item, code block) closes this content block. continue; } + // After processing all lines, close any open headings, list items, or content blocks with the last line as their end. const total_lines = lines.length; - - // After all lines, close any open headings while (heading_stack.length > 0) { const finished_heading = heading_stack.pop(); if (heading_lines[finished_heading.key][1] === null) { @@ -284,7 +318,6 @@ export function parse_blocks(markdown) { } } - // Close any open list item if (current_list_item) { if (heading_lines[current_list_item.key][1] === null) { heading_lines[current_list_item.key][1] = total_lines; @@ -292,7 +325,6 @@ export function parse_blocks(markdown) { current_list_item = null; } - // Close any open content block if (current_content_block) { if (heading_lines[current_content_block.key][1] === null) { heading_lines[current_content_block.key][1] = total_lines; @@ -300,15 +332,15 @@ export function parse_blocks(markdown) { current_content_block = null; } - // Close root heading if open + // If the root heading was opened but never closed, close it at the final line. if (heading_lines[root_heading_key] && heading_lines[root_heading_key][1] === null) { heading_lines[root_heading_key][1] = total_lines; } - // Assign to result + // Build the final result object from heading_lines. for (const key in heading_lines) { result[key] = heading_lines[key]; } return result; -} +} \ No newline at end of file