From a8223736920043f8badcd6a29c509bdcd49a5d6c Mon Sep 17 00:00:00 2001 From: Darren Ethier Date: Tue, 5 Nov 2019 13:29:55 -0500 Subject: [PATCH] add new algorithm for parsing i18n string --- .../element/src/create-interpolate-element.js | 419 +++++++++++------- .../src/test/create-interpolate-element.js | 123 +++-- 2 files changed, 314 insertions(+), 228 deletions(-) diff --git a/packages/element/src/create-interpolate-element.js b/packages/element/src/create-interpolate-element.js index 5b8afe7b06116e..adc6da40cf7fae 100644 --- a/packages/element/src/create-interpolate-element.js +++ b/packages/element/src/create-interpolate-element.js @@ -1,175 +1,97 @@ -/** - * Internal dependencies - */ -import { createElement, Fragment, isValidElement } from './react'; - /** * External dependencies */ -import { escapeRegExp, flatMap } from 'lodash'; - -const getHasPropValue = ( config ) => !! config.value; -const getHasChildren = ( config ) => !! config.hasChildren; - -const getBalancedTagsExpression = ( searchString ) => new RegExp( - escapeRegExp( `<${ searchString }>` ) + - '(.*)' + escapeRegExp( `` ) -); +import { createElement, isValidElement, Fragment } from 'react'; -const getSelfClosingTagExpression = ( searchString ) => new RegExp( - escapeRegExp( `<${ searchString }/>` ) -); +let indoc, + offset, + output, + stack, + keyIndex; /** - * Generates and returns the regular expression used for splitting a string + * Matches tags in the localized string * - * @param {string} searchString The search string serving as the base for the - * expression. + * This is used for extracting the tag pattern groups for parsing the localized + * string and along with the map converting it to a react element. * - * @return {RegExp} The generated regular expression + * There are four references extracted using this tokenizer: + * + * match: Full match of the tag (i.e. , ,
) + * isClosing: If the match has a closing tag (i.e. ) + * + * @type RegExp */ -const getSplitRegEx = ( searchString ) => new RegExp( '(' + escapeRegExp( searchString ) + ')' ); +const tokenizer = /<(\/)?(\w+)\s*(\/)?>/g; /** - * Generates a String.prototype.match value for the incoming arguments. - * - * @param {string} interpolatedString The string the match is performed on. - * @param {string} searchString The string used as the base for the - * search - * @param {Object} conversionConfig The configuration object for the - * interpolation being performed. + * This receives the value for a map element which consists of an element and + * it's props and returns a element creator function. * - * @return {Array|null} An array if there is a match or null if not. + * @param {Array} [ element, props ] The first item in the array is expected to + * be a string or a react component. The + * second item is expected to be either + * undefined or an object. */ -const getMatchFromString = ( - interpolatedString, - searchString, - conversionConfig -) => { - // first try children reg ex. If there is a match, then return. - const match = interpolatedString.match( - getBalancedTagsExpression( searchString ) - ); - - if ( match !== null ) { - conversionConfig.hasChildren = true; - return match; +const elementCreator = ( [ element, props = {} ] ) => ( children ) => { + if ( ! typeof element === 'string' && ! isValidElement( element ) ) { + throw new Error( 'not a valid element(' + element + ')' ); } - - // no children, so just return selfClosingTag match - return interpolatedString.match( getSelfClosingTagExpression( searchString ) ); + props.key = ++keyIndex; + return createElement( element, props, children ); }; -// index for keys -// This is external to `recursiveCreateElement` and reset in -// `createInterpolateElement` because of the recursion. -let keyIndex = -1; - /** - * Used to recursively create elements from the interpolation string using the - * conversion map. + * An object describing a component to be created. * - * @param {string} potentialElement The interpolation string (or fragment) - * being processed. - * @param {Array[]} conversionMap The interpolation map used for converting - * the string to a react element. + * This is used by the string iterator to track children that get added to an + * element when it is created. This allows for collecting nested elements in + * the string before creating the parent. * - * @return {Element|string|Array} A react element, string or array. + * @param {Function} creator An element creator that will be invoked for + * actually creating the react element with the + * provided children. + * @param {Array} children Array of children to be provided to the element + * creator when invoked. + * + * @return {Object} An object returning the creator and children. */ -const recursiveCreateElement = ( potentialElement, conversionMap ) => { - /** - * If the conversion map is not a valid array or empty then just return the - * element. - */ - if ( ! Array.isArray( conversionMap ) || ! conversionMap.length ) { - return potentialElement; - } - const [ mapItem ] = conversionMap; - const [ searchString, conversionConfig ] = mapItem; - - /** - * This short circuits the process if the conversion map has an invalid config. - */ - if ( ! searchString || ! conversionConfig ) { - return potentialElement; - } - - const match = getMatchFromString( - potentialElement, - searchString, - conversionConfig - ); - - // if there is no match for this string, then that means it is not an element - // so just return as is to be used as a direct child. - if ( match === null ) { - return potentialElement; - } - - // if the full match returned equals the potential element, then we know - // we can create the element and restart the conversion on any children if - // necessary. - if ( match[ 0 ] === potentialElement ) { - // remove this item from the conversion map because it's no longer needed. - conversionMap.shift(); - - if ( getHasPropValue( conversionConfig ) ) { - // if value is a react element, then need to wrap in Fragment with a key - // to prevent key warnings. - if ( isValidElement( conversionConfig.value ) ) { - keyIndex++; - return { conversionConfig.value }; - } - return conversionConfig.value; - } - keyIndex++; - return getHasChildren( conversionConfig ) ? - createElement( - conversionConfig.tag, - { ...conversionConfig.props, key: keyIndex }, - recursiveCreateElement( match[ 1 ], conversionMap ) - ) : - createElement( - conversionConfig.tag, - { ...conversionConfig.props, key: keyIndex } - ); - } - - // still here, so we need to split on the full match and loop through each - return flatMap( - potentialElement.split( getSplitRegEx( match[ 0 ] ) ) - .filter( ( value ) => !! value ), - ( element ) => { - return recursiveCreateElement( element, conversionMap ); - } - ); -}; +function Component( creator, children = [] ) { + return { + creator, + children, + }; +} /** - * This reorders the conversionMap so that it's entries match the order of - * elements in the string. + * This encapsulates information about the current iteration state for adding to + * the stack * - * This is necessary because the parser is order sensitive due to the potential - * for nested elements (eg. Some linked and emphasized string). - * This ensures that the parsing will be done correctly yet still allow for the - * consumer not to worry about order in the map. + * @param {Component} component + * @param {number} tokenStart + * @param {number} tokenLength + * @param {number} prevOffset + * @param {number} leadingTextStart * - * @param interpolatedString {string} The string being parsed. - * @param conversionMap {Array} The map being reordered - * - * @return {Array} The new map in the correct order for the tags in the string. + * @return {Object} Iteration info as an object. */ -const reorderMapByElementOrder = ( interpolatedString, conversionMap ) => { - // if length of map is only one then we can just return as is. - if ( conversionMap.length === 1 ) { - return conversionMap; - } - return conversionMap.sort( ( [ tagA ], [ tagB ] ) => { - tagA = `<${ tagA }`; - tagB = `<${ tagB }`; - return interpolatedString.indexOf( tagA ) > interpolatedString.indexOf( tagB ); - } ); -}; +function Frame( + component, + tokenStart, + tokenLength, + prevOffset, + leadingTextStart +) { + return { + component, + tokenStart, + tokenLength, + prevOffset: prevOffset || tokenStart + tokenLength, + leadingTextStart, + }; +} /** * This function creates an interpolated element from a passed in string with @@ -179,17 +101,16 @@ const reorderMapByElementOrder = ( interpolatedString, conversionMap ) => { * @example * For example, for the given string: * - * "This is a string with a link, a self-closing - * tag and a plain value " + * "This is a string with a link and a self-closing + * tag" * * You would have something like this as the conversionMap value: * * ```js * { - * span: { tag: CustomComponent, props: {} }, - * a: { tag: 'a', props: { href: 'https://github.com' } }, - * CustomComponentB: { tag: CustomComponentB, props: {} }, - * 'custom value': { value: 'custom value' }, + * span: ['span'], + * a: ['a', { href: 'https://github.com' }], + * CustomComponentB: [ CustomComponentB ], * } * ``` * @@ -200,26 +121,27 @@ const reorderMapByElementOrder = ( interpolatedString, conversionMap ) => { * @return {Element} A react element. */ const createInterpolateElement = ( interpolatedString, conversionMap ) => { + indoc = interpolatedString; + offset = 0; keyIndex = -1; + output = []; + stack = []; + tokenizer.lastIndex = 0; if ( ! isValidConversionMap( conversionMap ) ) { return interpolatedString; } - // verify that the object isn't empty. - conversionMap = Object.entries( conversionMap ); - if ( conversionMap.length === 0 ) { - return interpolatedString; - } + do { + // twiddle our thumbs + } while ( proceed( conversionMap ) ); - return createElement( - Fragment, - {}, - recursiveCreateElement( - interpolatedString, - reorderMapByElementOrder( interpolatedString, conversionMap ) - ), - ); + output = output.every( ( a ) => 'string' === typeof a ) ? + output.join( '' ) : + output.filter( ( a ) => '' !== a ); + return typeof output === 'string' ? + output : + createElement( Fragment, {}, output ); }; /** @@ -236,4 +158,169 @@ const isValidConversionMap = ( conversionMap ) => { typeof conversionMap.length === 'undefined'; }; +/** + * This is the iterator over the matches in the string. + * + * @param {Object} conversionMap The conversion map for the string. + * + * @return {boolean} true for continuing to iterate, false for finished. + */ +function proceed( conversionMap ) { + const next = nextToken(); + const [ tokenType, name, startOffset, tokenLength ] = next; + const stackDepth = stack.length; + const leadingTextStart = startOffset > offset ? offset : null; + if ( ! conversionMap[ name ] ) { + if ( stackDepth !== 0 ) { + const { stackLeadingText, tokenStart } = stack.pop(); + output.push( indoc.substr( stackLeadingText, tokenStart ) ); + } + addText(); + return false; + } + switch ( tokenType ) { + case 'no-more-tokens': + if ( stackDepth !== 0 ) { + const { stackLeadingText, tokenStart } = stack.pop(); + output.push( indoc.substr( stackLeadingText, tokenStart ) ); + } + addText(); + return false; + + case 'self-closed': + if ( 0 === stackDepth ) { + if ( null !== leadingTextStart ) { + output.push( + indoc.substr( leadingTextStart, startOffset - leadingTextStart ) + ); + } + output.push( elementCreator( conversionMap[ name ] )() ); + offset = startOffset + tokenLength; + return true; + } + + // otherwise we found an inner element + addChild( + new Component( elementCreator( conversionMap[ name ] ), [] ), + startOffset, + tokenLength + ); + offset = startOffset + tokenLength; + return true; + + case 'opener': + stack.push( + Frame( + new Component( elementCreator( conversionMap[ name ] ), [] ), + startOffset, + tokenLength, + startOffset + tokenLength, + leadingTextStart + ) + ); + offset = startOffset + tokenLength; + return true; + + case 'closer': + // if we're not nesting then this is easy - close the block + if ( 1 === stackDepth ) { + addComponentFromStack( startOffset ); + offset = startOffset + tokenLength; + return true; + } + + // otherwise we're nested and we have to close out the current + // block and add it as a innerBlock to the parent + const stackTop = stack.pop(); + const text = indoc.substr( + stackTop.prevOffset, + startOffset - stackTop.prevOffset + ); + stackTop.component.children.push( text ); + stackTop.prevOffset = startOffset + tokenLength; + + addChild( + stackTop.component, + stackTop.tokenStart, + stackTop.tokenLength, + startOffset + tokenLength + ); + offset = startOffset + tokenLength; + return true; + + default: + addText(); + return false; + } +} + +/** + * Grabs the next token match in the string and returns it's details. + * + * @return {Array} An array of details for the token matched. + */ +function nextToken() { + const matches = tokenizer.exec( indoc ); + // we have no more tokens + if ( null === matches ) { + return [ 'no-more-tokens' ]; + } + const startedAt = matches.index; + const [ match, isClosing, name, isSelfClosed ] = matches; + const length = match.length; + if ( isSelfClosed ) { + return [ 'self-closed', name, startedAt, length ]; + } + if ( isClosing ) { + return [ 'closer', name, startedAt, length ]; + } + return [ 'opener', name, startedAt, length ]; +} + +/** + * Pushes text extracted from the indoc string to the output stack given the + * current rawLength value and offset (if rawLength is provided ) or the + * indoc.length and offset. + * + * @param {number} rawLength If provided will be used as the length of chars + * to extract. + */ +function addText( rawLength ) { + const length = rawLength ? rawLength : indoc.length - offset; + if ( 0 === length ) { + return output.push( '' ); + } + output.push( indoc.substr( offset, length ) ); +} + +function addChild( component, tokenStart, tokenLength, lastOffset ) { + const parent = stack[ stack.length - 1 ]; + const text = indoc.substr( parent.prevOffset, tokenStart - parent.prevOffset ); + + if ( text ) { + parent.component.children.push( text ); + } + + parent.component.children.push( component.creator( component.children ) ); + parent.prevOffset = lastOffset ? lastOffset : tokenStart + tokenLength; +} + +function addComponentFromStack( endOffset ) { + const { component, leadingTextStart, prevOffset, tokenStart } = stack.pop(); + + const text = endOffset ? + indoc.substr( prevOffset, endOffset - prevOffset ) : + indoc.substr( prevOffset ); + + if ( text ) { + component.children.push( text ); + } + + if ( null !== leadingTextStart ) { + output.push( indoc.substr( leadingTextStart, tokenStart - leadingTextStart ) ); + } + + output.push( component.creator( component.children ) ); +} + export default createInterpolateElement; diff --git a/packages/element/src/test/create-interpolate-element.js b/packages/element/src/test/create-interpolate-element.js index 82089f387fb216..38945d186e7bab 100644 --- a/packages/element/src/test/create-interpolate-element.js +++ b/packages/element/src/test/create-interpolate-element.js @@ -9,16 +9,16 @@ describe( 'createInterpolateElement', () => { const testString = 'This is a string'; expect( createInterpolateElement( testString, [] ) - ).toEqual( 'This is a string' ); + ).toBe( 'This is a string' ); } ); it( 'returns same string when there are no tokens in the string', () => { const testString = 'This is a string'; expect( createInterpolateElement( testString, - { someValue: { value: 10 } } + { someValue: [ 'someValue' ] } ) - ).toEqual( { testString } ); + ).toBe( testString ); } ); it( 'returns same string when there is an invalid conversion map', () => { const testString = 'This is a string'; @@ -27,17 +27,35 @@ describe( 'createInterpolateElement', () => { testString, [ 'someValue', { value: 10 } ], ) - ).toEqual( testString ); + ).toBe( testString ); } ); it( 'returns same string when there is an non matching token in the ' + 'string', () => { - const testString = 'This is a string'; + const testString = 'This is a string'; expect( createInterpolateElement( testString, - { someValue: { value: 20 } } + { someValue: [ 'someValue' ] } ) - ).toEqual( { testString } ); + ).toBe( testString ); + } ); + it( 'returns same string when there is spaces in the token', () => { + const testString = 'This is a string'; + expect( + createInterpolateElement( + testString, + { 'spaced token': [ 'a' ] } + ) + ).toBe( testString ); + } ); + it( 'returns string without unbalanced tags', () => { + const testString = 'This is a string'; + expect( + createInterpolateElement( + testString, + { em: [ 'em' ] } + ) + ).toBe( 'This is a string' ); } ); it( 'returns expected react element for non nested components', () => { const testString = 'This is a string with a link.'; @@ -49,20 +67,22 @@ describe( 'createInterpolateElement', () => { createElement( 'a', { href: 'https://github.com', className: 'some_class', key: 0 }, - 'a link' + [ 'a link' ] ), '.', ] ); - expect( createInterpolateElement( + const component = createInterpolateElement( testString, { - a: { - tag: 'a', - props: { href: 'https://github.com', className: 'some_class' }, - }, + a: [ 'a', { href: 'https://github.com', className: 'some_class' } ], } - ) ).toEqual( expectedElement ); + ); + expect( + JSON.stringify( component ) + ).toEqual( + JSON.stringify( expectedElement ) + ); } ); it( 'returns expected react element for nested components', () => { const testString = 'This is a string that is linked.'; @@ -73,48 +93,33 @@ describe( 'createInterpolateElement', () => { 'This is a ', createElement( 'a', - { key: 0 }, + { key: 1 }, [ 'string that is ', createElement( 'em', - { key: 1 }, - 'linked' + { key: 0 }, + [ 'linked' ] ), ] ), '.', ] ); - expect( createInterpolateElement( + expect( JSON.stringify( createInterpolateElement( testString, { - a: { tag: 'a', props: {} }, - em: { tag: 'em', props: {} }, + a: [ 'a' ], + em: [ 'em' ], } - ) ).toEqual( expectedElement ); - } ); - it( 'returns a value for a prop value type token replacement', () => { - const testString = 'This is a string with a value token: '; - const expectedElement = createElement( - Fragment, - {}, - [ - 'This is a string with a value token: ', - 10, - ] - ); - expect( createInterpolateElement( - testString, - { someValue: { value: 10 } } - ) ).toEqual( expectedElement ); + ) ) ).toEqual( JSON.stringify( expectedElement ) ); } ); it( 'returns expected output for a custom component with children ' + 'replacement', () => { const TestComponent = ( props ) => { return
{ props.children }
; }; - const testString = 'This is a string with a Custom Component'; + const testString = 'This is a string with a Custom Component'; const expectedElement = createElement( Fragment, {}, @@ -123,22 +128,22 @@ describe( 'createInterpolateElement', () => { createElement( TestComponent, { key: 0 }, - 'Custom Component' + [ 'Custom Component' ] ), ] ); - expect( createInterpolateElement( + expect( JSON.stringify( createInterpolateElement( testString, { - span: { tag: TestComponent, props: {} }, + TestComponent: [ TestComponent ], } - ) ).toEqual( expectedElement ); + ) ) ).toEqual( JSON.stringify( expectedElement ) ); } ); it( 'returns expected output for self closing custom component', () => { const TestComponent = ( props ) => { return
; }; - const testString = 'This is a string with a self closing custom component: '; + const testString = 'This is a string with a self closing custom component: '; const expectedElement = createElement( Fragment, {}, @@ -150,57 +155,51 @@ describe( 'createInterpolateElement', () => { ), ] ); - expect( createInterpolateElement( + expect( JSON.stringify( createInterpolateElement( testString, { - span: { tag: TestComponent, props: {} }, + TestComponent: [ TestComponent ], } - ) ).toEqual( expectedElement ); + ) ) ).toEqual( JSON.stringify( expectedElement ) ); } ); it( 'returns expected output for complex replacement', () => { const TestComponent = ( props ) => { return
; }; - const testString = 'This is a complex string having a value, with ' + - 'a nested link and value: '; + const testString = 'This is a complex string with ' + + 'a nested emphasized string link and value: '; const expectedElement = createElement( Fragment, {}, [ - 'This is a complex string having a ', - 'concrete', - ' value, with a ', + 'This is a complex string with a ', createElement( 'a', - { key: 0 }, + { key: 1 }, [ 'nested ', createElement( 'em', - { key: 1 }, - 'value' + { key: 0 }, + [ 'emphasized string' ] ), ' link', ] ), ' and value: ', createElement( - Fragment, - { key: 2 }, - , + TestComponent, + { key: 2 } ), ] ); expect( JSON.stringify( createInterpolateElement( testString, { - TestComponent: { value: }, - concrete: { value: 'concrete' }, - em1: { tag: 'em', props: {} }, - value: { value: 'value' }, - a1: { tag: 'a', props: {} }, + TestComponent: [ TestComponent ], + em1: [ 'em' ], + a1: [ 'a' ], } ) ) ).toEqual( JSON.stringify( expectedElement ) ); } ); - // test complex multi types replacements. } );