diff --git a/packages/blocks/README.md b/packages/blocks/README.md index c1f424fffd85a8..9f94639641b52a 100644 --- a/packages/blocks/README.md +++ b/packages/blocks/README.md @@ -594,7 +594,21 @@ _Returns_ # **parse** -Parses the post content with a PegJS grammar and returns a list of blocks. +Utilizes an optimized token-driven parser based on the Gutenberg grammar spec +defined through a parsing expression grammar to take advantage of the regular +cadence provided by block delimiters -- composed syntactically through HTML +comments -- which, given a general HTML document as an input, returns a block +list array representation. + +This is a recursive-descent parser that scans linearly once through the input +document. Instead of directly recursing it utilizes a trampoline mechanism to +prevent stack overflow. This initial pass is mainly interested in separating +and isolating the blocks serialized in the document and manifestly not in the +content within the blocks. + +_Related_ + +- _Parameters_ diff --git a/packages/blocks/src/api/index.js b/packages/blocks/src/api/index.js index 1c3a1bfb60dffb..6952ebe9223db7 100644 --- a/packages/blocks/src/api/index.js +++ b/packages/blocks/src/api/index.js @@ -1,3 +1,9 @@ +// The blocktype is the most important concept within the block API. It defines +// all aspects of the block configuration and its interfaces, including `edit` +// and `save`. The transforms specification allows converting one blocktype to +// another through formulas defined by either the source or the destination. +// Switching a blocktype is to be considered a one-way operation implying a +// transformation in the opposite way has to be handled explicitly. export { createBlock, createBlocksFromInnerBlocksTemplate, @@ -8,16 +14,45 @@ export { findTransform, getBlockFromExample, } from './factory'; + +// The block tree is composed of a collection of block nodes. Blocks contained +// within other blocks are called inner blocks. An important design +// consideration is that inner blocks are -- conceptually -- not part of the +// territory established by the parent block that contains them. +// +// This has multiple practical implications: when parsing, we can safely dispose +// of any block boundary found within a block from the innerHTML property when +// transfering to state. Not doing so would have a compounding effect on memory +// and uncertainty over the source of truth. This can be illustrated in how, +// given a tree of `n` nested blocks, the entry node would have to contain the +// actual content of each block while each subsequent block node in the state +// tree would replicate the entire chain `n-1`, meaning the extreme end node +// would have been replicated `n` times as the tree is traversed and would +// generate uncertainty as to which one is to hold the current value of the +// block. For composition, it also means inner blocks can effectively be child +// components whose mechanisms can be shielded from the `edit` implementation +// and just passed along. export { default as parse, getBlockAttributes, parseWithAttributeSchema, } from './parser'; + +// While block transformations account for a specific surface of the API, there +// are also raw transformations which handle arbitrary sources not made out of +// blocks but producing block basaed on various heursitics. This includes +// pasting rich text or HTML data. export { pasteHandler, rawHandler, deprecatedGetPhrasingContentSchema as getPhrasingContentSchema, } from './raw-handling'; + +// The process of serialization aims to deflate the internal memory of the block +// editor and its state representation back into an HTML valid string. This +// process restores the document integrity and inserts invisible delimiters +// around each block with HTML comment boundaries which can contain any extra +// attributes needed to operate with the block later on. export { default as serialize, getBlockContent, @@ -27,8 +62,48 @@ export { getSaveContent, getBlockProps as __unstableGetBlockProps, } from './serializer'; + +// Validation is the process of comparing a block source with its output before +// there is any user input or interaction with a block. When this operation +// fails -- for whatever reason -- the block is to be considered invalid. As +// part of validating a block the system will attempt to run the source against +// any provided deprecation definitions. +// +// Worth emphasizing that validation is not a case of whether the markup is +// merely HTML spec-compliant but about how the editor knows to create such +// markup and that its inability to create an identical result can be a strong +// indicator of potential data loss (the invalidation is then a protective +// measure). +// +// The invalidation process can also be deconstructed in phases: 1) validate the +// block exists; 2) validate the source matches the output; 3) validate the +// source matches deprecated outputs; 4) work through the significance of +// differences. These are stacked in a way that favors performance and optimizes +// for the majority of cases. That is to say, the evaluation logic can become +// more sophisticated the further down it goes in the process as the cost is +// accounted for. The first logic checks have to be extremely efficient since +// they will be run for all valid and invalid blocks alike. However, once a +// block is detected as invalid -- failing the three first steps -- it is +// adequate to spend more time determining validity before throwing a conflict. export { isValidBlockContent } from './validation'; export { getCategories, setCategories, updateCategory } from './categories'; + +// Blocks are inherently indifferent about where the data they operate with ends +// up being saved. For example, all blocks can have a static and dynamic aspect +// to them depending on the needs. The static nature of a block is the `save()` +// definition that is meant to be serialized into HTML and which can be left +// void. Any block can also register a `render_callback` on the server, which +// makes its output dynamic either in part or in its totality. +// +// Child blocks are defined as a relationship that builds on top of the inner +// blocks mechanism. A child block is a block node of a particular type that can +// only exist within the inner block boundaries of a specific parent type. This +// allows block authors to compose specific blocks that are not meant to be used +// outside of a specified parent block context. Thus, child blocks extend the +// concept of inner blocks to support a more direct relationship between sets of +// blocks. The addition of parent–child would be a subset of the inner block +// functionality under the premise that certain blocks only make sense as +// children of another block. export { registerBlockType, registerBlockCollection, @@ -63,6 +138,13 @@ export { getBlockLabel as __experimentalGetBlockLabel, getAccessibleBlockLabel as __experimentalGetAccessibleBlockLabel, } from './utils'; + +// Templates are, in a general sense, a basic collection of block nodes with any +// given set of predefined attributes that are supplied as the initial state of +// an inner blocks group. These nodes can, in turn, contain any number of nested +// blocks within their definition. Templates allow both to specify a default +// state for an editor session or a default set of blocks for any inner block +// implementation within a specific block. export { doBlocksMatchTemplate, synchronizeBlocksWithTemplate, diff --git a/packages/blocks/src/api/parser.js b/packages/blocks/src/api/parser.js index b87eb4f7abf52b..829557f89f2011 100644 --- a/packages/blocks/src/api/parser.js +++ b/packages/blocks/src/api/parser.js @@ -242,7 +242,8 @@ export function getBlockAttribute( let value; switch ( attributeSchema.source ) { - // undefined source means that it's an attribute serialized to the block's "comment" + // An undefined source means that it's an attribute serialized to the + // block's "comment". case undefined: value = commentAttributes ? commentAttributes[ attributeKey ] @@ -324,15 +325,22 @@ export function getMigratedBlock( block, parsedAttributes ) { const blockType = getBlockType( block.name ); const { deprecated: deprecatedDefinitions } = blockType; + // Bail early if there are no registered deprecations to be handled. if ( ! deprecatedDefinitions || ! deprecatedDefinitions.length ) { return block; } const { originalContent, innerBlocks } = block; + // By design, blocks lack any sort of version tracking. Instead, to process + // outdated content the system operates a queue out of all the defined + // attribute shapes and tries each definition until the input produces a + // valid result. This mechanism seeks to avoid polluting the user-space with + // machine-specific code. An invalid block is thus a block that could not be + // matched successfully with any of the registered deprecation definitions. for ( let i = 0; i < deprecatedDefinitions.length; i++ ) { // A block can opt into a migration even if the block is valid by - // defining isEligible on its deprecation. If the block is both valid + // defining `isEligible` on its deprecation. If the block is both valid // and does not opt to migrate, skip. const { isEligible = stubFalse } = deprecatedDefinitions[ i ]; if ( block.isValid && ! isEligible( parsedAttributes, innerBlocks ) ) { @@ -360,6 +368,8 @@ export function getMigratedBlock( block, parsedAttributes ) { originalContent ); + // An invalid block does not imply incorrect HTML but the fact block + // source information could be lost on reserialization. if ( ! isValid ) { block = { ...block, @@ -456,8 +466,15 @@ export function convertLegacyBlocks( name, attributes ) { */ export function createBlockWithFallback( blockNode ) { const { blockName: originalName } = blockNode; + + // The fundamental structure of a blocktype includes its attributes, inner + // blocks, and inner HTML. It is important to distinguish inner blocks from + // the HTML content of the block as only the latter is relevant for block + // validation and edit operations. let { attrs: attributes, innerBlocks = [], innerHTML } = blockNode; const { innerContent } = blockNode; + + // Blocks that don't have a registered handler are considered freeform. const freeformContentFallbackBlock = getFreeformContentHandlerName(); const unregisteredFallbackBlock = getUnregisteredTypeHandlerName() || freeformContentFallbackBlock; @@ -473,7 +490,7 @@ export function createBlockWithFallback( blockNode ) { ( { name, attributes } = convertLegacyBlocks( name, attributes ) ); - // Fallback content may be upgraded from classic editor expecting implicit + // Fallback content may be upgraded from classic content expecting implicit // automatic paragraphs, so preserve them. Assumes wpautop is idempotent, // meaning there are no negative consequences to repeated autop calls. if ( name === freeformContentFallbackBlock ) { @@ -496,7 +513,7 @@ export function createBlockWithFallback( blockNode ) { // Preserve undelimited content for use by the unregistered type // handler. A block node's `innerHTML` isn't enough, as that field only - // carries the block's own HTML and not its nested blocks'. + // carries the block's own HTML and not its nested blocks. const originalUndelimitedContent = serializeBlockNode( reconstitutedBlockNode, { @@ -567,6 +584,7 @@ export function createBlockWithFallback( blockNode ) { // as invalid, or future serialization attempt results in an error. block.originalContent = block.originalContent || innerHTML; + // Ensure all necessary migrations are applied to the block. block = getMigratedBlock( block, attributes ); if ( block.validationIssues && block.validationIssues.length > 0 ) { @@ -622,7 +640,7 @@ export function serializeBlockNode( blockNode, options = {} ) { let childIndex = 0; const content = innerContent .map( ( item ) => - // `null` denotes a nested block, otherwise we have an HTML fragment + // `null` denotes a nested block, otherwise we have an HTML fragment. item !== null ? item : serializeBlockNode( innerBlocks[ childIndex++ ], options ) @@ -653,7 +671,20 @@ const createParse = ( parseImplementation ) => ( content ) => }, [] ); /** - * Parses the post content with a PegJS grammar and returns a list of blocks. + * Utilizes an optimized token-driven parser based on the Gutenberg grammar spec + * defined through a parsing expression grammar to take advantage of the regular + * cadence provided by block delimiters -- composed syntactically through HTML + * comments -- which, given a general HTML document as an input, returns a block + * list array representation. + * + * This is a recursive-descent parser that scans linearly once through the input + * document. Instead of directly recursing it utilizes a trampoline mechanism to + * prevent stack overflow. This initial pass is mainly interested in separating + * and isolating the blocks serialized in the document and manifestly not in the + * content within the blocks. + * + * @see + * https://developer.wordpress.org/block-editor/packages/packages-block-serialization-default-parser/ * * @param {string} content The post content. *