diff --git a/packages/blocks/README.md b/packages/blocks/README.md
index c1f424fffd85a8..9f94639641b52a 100644
--- a/packages/blocks/README.md
+++ b/packages/blocks/README.md
@@ -594,7 +594,21 @@ _Returns_
# **parse**
-Parses the post content with a PegJS grammar and returns a list of blocks.
+Utilizes an optimized token-driven parser based on the Gutenberg grammar spec
+defined through a parsing expression grammar to take advantage of the regular
+cadence provided by block delimiters -- composed syntactically through HTML
+comments -- which, given a general HTML document as an input, returns a block
+list array representation.
+
+This is a recursive-descent parser that scans linearly once through the input
+document. Instead of directly recursing it utilizes a trampoline mechanism to
+prevent stack overflow. This initial pass is mainly interested in separating
+and isolating the blocks serialized in the document and manifestly not in the
+content within the blocks.
+
+_Related_
+
+-
_Parameters_
diff --git a/packages/blocks/src/api/index.js b/packages/blocks/src/api/index.js
index 1c3a1bfb60dffb..6952ebe9223db7 100644
--- a/packages/blocks/src/api/index.js
+++ b/packages/blocks/src/api/index.js
@@ -1,3 +1,9 @@
+// The blocktype is the most important concept within the block API. It defines
+// all aspects of the block configuration and its interfaces, including `edit`
+// and `save`. The transforms specification allows converting one blocktype to
+// another through formulas defined by either the source or the destination.
+// Switching a blocktype is to be considered a one-way operation implying a
+// transformation in the opposite way has to be handled explicitly.
export {
createBlock,
createBlocksFromInnerBlocksTemplate,
@@ -8,16 +14,45 @@ export {
findTransform,
getBlockFromExample,
} from './factory';
+
+// The block tree is composed of a collection of block nodes. Blocks contained
+// within other blocks are called inner blocks. An important design
+// consideration is that inner blocks are -- conceptually -- not part of the
+// territory established by the parent block that contains them.
+//
+// This has multiple practical implications: when parsing, we can safely dispose
+// of any block boundary found within a block from the innerHTML property when
+// transfering to state. Not doing so would have a compounding effect on memory
+// and uncertainty over the source of truth. This can be illustrated in how,
+// given a tree of `n` nested blocks, the entry node would have to contain the
+// actual content of each block while each subsequent block node in the state
+// tree would replicate the entire chain `n-1`, meaning the extreme end node
+// would have been replicated `n` times as the tree is traversed and would
+// generate uncertainty as to which one is to hold the current value of the
+// block. For composition, it also means inner blocks can effectively be child
+// components whose mechanisms can be shielded from the `edit` implementation
+// and just passed along.
export {
default as parse,
getBlockAttributes,
parseWithAttributeSchema,
} from './parser';
+
+// While block transformations account for a specific surface of the API, there
+// are also raw transformations which handle arbitrary sources not made out of
+// blocks but producing block basaed on various heursitics. This includes
+// pasting rich text or HTML data.
export {
pasteHandler,
rawHandler,
deprecatedGetPhrasingContentSchema as getPhrasingContentSchema,
} from './raw-handling';
+
+// The process of serialization aims to deflate the internal memory of the block
+// editor and its state representation back into an HTML valid string. This
+// process restores the document integrity and inserts invisible delimiters
+// around each block with HTML comment boundaries which can contain any extra
+// attributes needed to operate with the block later on.
export {
default as serialize,
getBlockContent,
@@ -27,8 +62,48 @@ export {
getSaveContent,
getBlockProps as __unstableGetBlockProps,
} from './serializer';
+
+// Validation is the process of comparing a block source with its output before
+// there is any user input or interaction with a block. When this operation
+// fails -- for whatever reason -- the block is to be considered invalid. As
+// part of validating a block the system will attempt to run the source against
+// any provided deprecation definitions.
+//
+// Worth emphasizing that validation is not a case of whether the markup is
+// merely HTML spec-compliant but about how the editor knows to create such
+// markup and that its inability to create an identical result can be a strong
+// indicator of potential data loss (the invalidation is then a protective
+// measure).
+//
+// The invalidation process can also be deconstructed in phases: 1) validate the
+// block exists; 2) validate the source matches the output; 3) validate the
+// source matches deprecated outputs; 4) work through the significance of
+// differences. These are stacked in a way that favors performance and optimizes
+// for the majority of cases. That is to say, the evaluation logic can become
+// more sophisticated the further down it goes in the process as the cost is
+// accounted for. The first logic checks have to be extremely efficient since
+// they will be run for all valid and invalid blocks alike. However, once a
+// block is detected as invalid -- failing the three first steps -- it is
+// adequate to spend more time determining validity before throwing a conflict.
export { isValidBlockContent } from './validation';
export { getCategories, setCategories, updateCategory } from './categories';
+
+// Blocks are inherently indifferent about where the data they operate with ends
+// up being saved. For example, all blocks can have a static and dynamic aspect
+// to them depending on the needs. The static nature of a block is the `save()`
+// definition that is meant to be serialized into HTML and which can be left
+// void. Any block can also register a `render_callback` on the server, which
+// makes its output dynamic either in part or in its totality.
+//
+// Child blocks are defined as a relationship that builds on top of the inner
+// blocks mechanism. A child block is a block node of a particular type that can
+// only exist within the inner block boundaries of a specific parent type. This
+// allows block authors to compose specific blocks that are not meant to be used
+// outside of a specified parent block context. Thus, child blocks extend the
+// concept of inner blocks to support a more direct relationship between sets of
+// blocks. The addition of parent–child would be a subset of the inner block
+// functionality under the premise that certain blocks only make sense as
+// children of another block.
export {
registerBlockType,
registerBlockCollection,
@@ -63,6 +138,13 @@ export {
getBlockLabel as __experimentalGetBlockLabel,
getAccessibleBlockLabel as __experimentalGetAccessibleBlockLabel,
} from './utils';
+
+// Templates are, in a general sense, a basic collection of block nodes with any
+// given set of predefined attributes that are supplied as the initial state of
+// an inner blocks group. These nodes can, in turn, contain any number of nested
+// blocks within their definition. Templates allow both to specify a default
+// state for an editor session or a default set of blocks for any inner block
+// implementation within a specific block.
export {
doBlocksMatchTemplate,
synchronizeBlocksWithTemplate,
diff --git a/packages/blocks/src/api/parser.js b/packages/blocks/src/api/parser.js
index b87eb4f7abf52b..829557f89f2011 100644
--- a/packages/blocks/src/api/parser.js
+++ b/packages/blocks/src/api/parser.js
@@ -242,7 +242,8 @@ export function getBlockAttribute(
let value;
switch ( attributeSchema.source ) {
- // undefined source means that it's an attribute serialized to the block's "comment"
+ // An undefined source means that it's an attribute serialized to the
+ // block's "comment".
case undefined:
value = commentAttributes
? commentAttributes[ attributeKey ]
@@ -324,15 +325,22 @@ export function getMigratedBlock( block, parsedAttributes ) {
const blockType = getBlockType( block.name );
const { deprecated: deprecatedDefinitions } = blockType;
+ // Bail early if there are no registered deprecations to be handled.
if ( ! deprecatedDefinitions || ! deprecatedDefinitions.length ) {
return block;
}
const { originalContent, innerBlocks } = block;
+ // By design, blocks lack any sort of version tracking. Instead, to process
+ // outdated content the system operates a queue out of all the defined
+ // attribute shapes and tries each definition until the input produces a
+ // valid result. This mechanism seeks to avoid polluting the user-space with
+ // machine-specific code. An invalid block is thus a block that could not be
+ // matched successfully with any of the registered deprecation definitions.
for ( let i = 0; i < deprecatedDefinitions.length; i++ ) {
// A block can opt into a migration even if the block is valid by
- // defining isEligible on its deprecation. If the block is both valid
+ // defining `isEligible` on its deprecation. If the block is both valid
// and does not opt to migrate, skip.
const { isEligible = stubFalse } = deprecatedDefinitions[ i ];
if ( block.isValid && ! isEligible( parsedAttributes, innerBlocks ) ) {
@@ -360,6 +368,8 @@ export function getMigratedBlock( block, parsedAttributes ) {
originalContent
);
+ // An invalid block does not imply incorrect HTML but the fact block
+ // source information could be lost on reserialization.
if ( ! isValid ) {
block = {
...block,
@@ -456,8 +466,15 @@ export function convertLegacyBlocks( name, attributes ) {
*/
export function createBlockWithFallback( blockNode ) {
const { blockName: originalName } = blockNode;
+
+ // The fundamental structure of a blocktype includes its attributes, inner
+ // blocks, and inner HTML. It is important to distinguish inner blocks from
+ // the HTML content of the block as only the latter is relevant for block
+ // validation and edit operations.
let { attrs: attributes, innerBlocks = [], innerHTML } = blockNode;
const { innerContent } = blockNode;
+
+ // Blocks that don't have a registered handler are considered freeform.
const freeformContentFallbackBlock = getFreeformContentHandlerName();
const unregisteredFallbackBlock =
getUnregisteredTypeHandlerName() || freeformContentFallbackBlock;
@@ -473,7 +490,7 @@ export function createBlockWithFallback( blockNode ) {
( { name, attributes } = convertLegacyBlocks( name, attributes ) );
- // Fallback content may be upgraded from classic editor expecting implicit
+ // Fallback content may be upgraded from classic content expecting implicit
// automatic paragraphs, so preserve them. Assumes wpautop is idempotent,
// meaning there are no negative consequences to repeated autop calls.
if ( name === freeformContentFallbackBlock ) {
@@ -496,7 +513,7 @@ export function createBlockWithFallback( blockNode ) {
// Preserve undelimited content for use by the unregistered type
// handler. A block node's `innerHTML` isn't enough, as that field only
- // carries the block's own HTML and not its nested blocks'.
+ // carries the block's own HTML and not its nested blocks.
const originalUndelimitedContent = serializeBlockNode(
reconstitutedBlockNode,
{
@@ -567,6 +584,7 @@ export function createBlockWithFallback( blockNode ) {
// as invalid, or future serialization attempt results in an error.
block.originalContent = block.originalContent || innerHTML;
+ // Ensure all necessary migrations are applied to the block.
block = getMigratedBlock( block, attributes );
if ( block.validationIssues && block.validationIssues.length > 0 ) {
@@ -622,7 +640,7 @@ export function serializeBlockNode( blockNode, options = {} ) {
let childIndex = 0;
const content = innerContent
.map( ( item ) =>
- // `null` denotes a nested block, otherwise we have an HTML fragment
+ // `null` denotes a nested block, otherwise we have an HTML fragment.
item !== null
? item
: serializeBlockNode( innerBlocks[ childIndex++ ], options )
@@ -653,7 +671,20 @@ const createParse = ( parseImplementation ) => ( content ) =>
}, [] );
/**
- * Parses the post content with a PegJS grammar and returns a list of blocks.
+ * Utilizes an optimized token-driven parser based on the Gutenberg grammar spec
+ * defined through a parsing expression grammar to take advantage of the regular
+ * cadence provided by block delimiters -- composed syntactically through HTML
+ * comments -- which, given a general HTML document as an input, returns a block
+ * list array representation.
+ *
+ * This is a recursive-descent parser that scans linearly once through the input
+ * document. Instead of directly recursing it utilizes a trampoline mechanism to
+ * prevent stack overflow. This initial pass is mainly interested in separating
+ * and isolating the blocks serialized in the document and manifestly not in the
+ * content within the blocks.
+ *
+ * @see
+ * https://developer.wordpress.org/block-editor/packages/packages-block-serialization-default-parser/
*
* @param {string} content The post content.
*