Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: Adds new RegExp-based "parser" #478

Closed
wants to merge 4 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
193 changes: 192 additions & 1 deletion blocks/api/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -220,4 +220,195 @@ export function parseWithGrammar( content ) {
}, [] );
}

export default parseWithTinyMCE;
/**
* Matches opening block comments
*
* <!-- wp:block/type arg:14 value:something -->
*
* This includes the global flag so that we can
* track not only where the string begins but also
* where it ends with the `.lastIndex` property
*
* @type {RegExp}
*/
const blockOpenerPattern = /<!--\s*wp:([a-z](?:[a-z0-9/][a-z0-9]+)*)\s+((?:(?!-->).)*)-->/ig;

/**
* Matches closing block comments
*
* <!-- /wp:block/type -->
*
* This includes the global flag so that we can
* track not only where the string begins but also
* where it ends with the `.lastIndex` property
*
* @type {RegExp}
*/
const blockCloserPattern = /<!--\s*\/wp:([a-z](?:[a-z0-9/][a-z0-9]+)*)\s+-->/ig;

/**
* Splits a string once at a given delimiter
*
* @param {String} delimiter pattern at which to split string
* @param {String} s input string to split
* @returns {[String,String]} [part before delimiter, part after delimiter]
*/
function splitAt( delimiter, s ) {
const [ name, ...values ] = s.split( delimiter );

return [ name, values.join( '' ) ];
}

/**
* Takes a string containing block comment attributes and
* returns an object of key/value pairs representing same
*
* Note: The last of a repeating definition of an attribute
* for a given key will be the value of the attribute
* in the returned object.
*
* @param {String} attrs e.g. " id:14 url:https://s0.wp.com/thing
* @returns {Object<String,String>} key/value pairs of attributes
*/
function regexParseAttrs( attrs ) {
return attrs
.trim()
.split( /\s+/ )
.map( s => splitAt( ':', s ) )
.filter( ( [ name, /* value */ ] ) => !! name )
.reduce( ( o, [ name, value ] ) => ( { ...o, [ name ]: value } ), {} );
}

/**
* Parses the post content with a RegExp based parser
* and returns a list of block data structures
*
* Scans the content to find block opening and block closing
* comments. When we find an opening, push the accumulated
* content into the output, track the opening, and descend
* into the remaining content and repeat. When we find a
* closing comment, fill in the accumulated content into the
* passed partial block and append into the output, and repeat.
* If any content remains past the last closing comment return
* as the remainder to be later appended to the output as a
* free-form block.
*
* @TODO: This messes up nested nodes; fix by handling children
* in another accumulator and merging on close to preserve
* the tail-call recursion. Note that blocks are not yet
* nested so right now it's not a problem.
*
* @example
* content output remaining openBlock
*
* A(Sub)Expr [] (Sub)Expr .
* Sub)Expr [{A}] Sub)Expr {}
* )Expr [{A}{Sub}] )Expr .
* Expr [{A}{Sub}] Expr {}
* [{A}{Sub}{Expr}]
*
* @param {String} content running post content to parse
* @param {Array<Object>} [output=[]] running total output of parser
* @param {String} [remaining=''] running remaining content to parse not captured by parser
* @param {?Object} openBlock partial block information to carry along if opening a block
* @returns {Array<Object>|Function} final parsed content or a continuation thunk for recursion
*/
export function regExpParser( content, output = [], remaining = '', openBlock = null ) {
blockOpenerPattern.lastIndex = 0; // RegExp with `g` flag (which we need to read lastIndex)
blockCloserPattern.lastIndex = 0; // must be reset or we will get skewed indices
const firstOpen = blockOpenerPattern.exec( content );
const firstClose = blockCloserPattern.exec( content );

if ( ! content ) {
return [ output, remaining ];
}

// no blocks at all
if ( ! firstOpen && ! firstClose ) {
if ( openBlock ) {
throw new SyntaxError( 'Cannot leave a block unclosed' );
}

return [ output.concat[ { attrs: {}, rawContent: content } ], remaining ];
}

// closing a non-existent block
if ( firstClose && firstOpen && firstClose.index < firstOpen.index && ! openBlock ) {
throw new SyntaxError( 'Cannot close a block that isn\'t open' );
}

// closing an existing block
if ( firstClose && ( ! firstOpen || firstClose.index < firstOpen.index ) ) {
return () => regExpParser(
content.slice( blockCloserPattern.lastIndex ),
output.concat( { ...openBlock, rawContent: content.slice( 0, firstClose.index ) } ),
content.slice( blockCloserPattern.lastIndex ),
null
);
}

// open a block
if ( firstOpen ) {
const [ /* fullMatch */, blockType, rawAttrs ] = firstOpen;
const attrs = regexParseAttrs( rawAttrs );

return () => regExpParser(
content.slice( blockOpenerPattern.lastIndex ),
output,
content.slice( blockOpenerPattern.lastIndex ),
{ blockType, attrs }
);
}

return [ output, remaining ];
}

/**
* Run tail-call-recursive functions in constant stack space
*
* Cannot be used in this form to eventually return a function!
* If you need to return a function this requires a slight
* modification to the transformed function such that it always
* returns a pair: [ next thunk or stop, value of accumulators ]
*
* This method has been chosen because it is simpler to implement
* and read and keeps some noise out of the wrapped functions.
*
* @see https://en.wikipedia.org/wiki/Tail_call#Through_trampolining
*
* @example
* // stack overflow
* const factorial = ( n, a = 1 ) => n > 1 ? factorial( n - 1, n * a ) : a;
* factorial( 20000 );
*
* // safe
* const factorial = ( n, a = 1 ) => n > 1 ? () => factorial( n - 1, n * a ) : a;
* trampoline( factorial( 20000 ) );
*
* @param {*} f trampolined function to call, or a non-function value
* @returns {*} non-function value returned by trampoline
*/
function simpleTrampoline( f ) {
while ( 'function' === typeof f ) {
f = f();
}

return f;
}

export function parseWithRegExp( content ) {
const [ doc, remaining ] = simpleTrampoline( regExpParser( content ) );

return doc
.concat( { attrs: {}, rawContent: remaining } )
.reduce( ( memo, blockNode ) => {
const { blockType, rawContent, attrs } = blockNode;
const block = createBlockWithFallback( blockType, rawContent, attrs );
if ( block ) {
memo.push( block );
}
return memo;
}, [] );
}

export default parseWithRegExp;