/** * Matches block comment delimiters * * While most of this pattern is straightforward the attribute parsing * incorporates a tricks to make sure we don't choke on specific input * * - since JavaScript has no possessive quantifier or atomic grouping * we are emulating it with a trick * * we want a possessive quantifier or atomic group to prevent backtracking * on the `}`s should we fail to match the remainder of the pattern * * we can emulate this with a positive lookahead and back reference * (a++)*c === ((?=(a+))\1)*c * * let's examine an example: * - /(a+)*c/.test('aaaaaaaaaaaaad') fails after over 49,000 steps * - /(a++)*c/.test('aaaaaaaaaaaaad') fails after 85 steps * - /(?>a+)*c/.test('aaaaaaaaaaaaad') fails after 126 steps * * this is because the possessive `++` and the atomic group `(?>)` * tell the engine that all those `a`s belong together as a single group * and so it won't split it up when stepping backwards to try and match * * if we use /((?=(a+))\1)*c/ then we get the same behavior as the atomic group * or possessive and prevent the backtracking because the `a+` is matched but * not captured. thus, we find the long string of `a`s and remember it, then * reference it as a whole unit inside our pattern * * @see http://instanceof.me/post/52245507631/regex-emulate-atomic-grouping-with-lookahead * @see http://blog.stevenlevithan.com/archives/mimic-atomic-groups * @see https://javascript.info/regexp-infinite-backtracking-problem * * once browsers reliably support atomic grouping or possessive * quantifiers natively we should remove this trick and simplify * * @type {RegExp} * * @since 3.8.0 * @since 4.6.1 added optimization to prevent backtracking on attribute parsing */ const tokenizer = /<!--\s+(\/)?wp:([a-z][a-z0-9_-]*\/)?([a-z][a-z0-9_-]*)\s+({(?:(?=([^}]+|}+(?=})|(?!}\s+\/?-->)[^])*)\5|[^]*?)}\s+)?(\/)?-->/g;
/** * Parses the next token in the input document. * * @return {boolean} Returns true when there is more tokens to parse. */ function proceed() { const stackDepth = stack.length; const next = nextToken(); const [tokenType, blockName, attrs, startOffset, tokenLength] = next;
// We may have some HTML soup before the next block. const leadingHtmlStart = startOffset > offset ? offset : null; switch (tokenType) { case 'no-more-tokens': // If not in a block then flush output. if (0 === stackDepth) { addFreeform(); return false; }
// Otherwise we have a problem // This is an error // we have options // - treat it all as freeform text // - assume an implicit closer (easiest when not nesting)
// For the easy case we'll assume an implicit closer. if (1 === stackDepth) { addBlockFromStack(); return false; }
// For the nested case where it's more difficult we'll // have to assume that multiple closers are missing // and so we'll collapse the whole stack piecewise. while (0 < stack.length) { addBlockFromStack(); } return false; case 'void-block': // easy case is if we stumbled upon a void block // in the top-level of the document. if (0 === stackDepth) { if (null !== leadingHtmlStart) { output.push(Freeform(document.substr(leadingHtmlStart, startOffset - leadingHtmlStart))); } output.push(Block(blockName, attrs, [], '', [])); offset = startOffset + tokenLength; return true; }
// Otherwise we found an inner block. addInnerBlock(Block(blockName, attrs, [], '', []), startOffset, tokenLength); offset = startOffset + tokenLength; return true; case 'block-opener': // Track all newly-opened blocks on the stack. stack.push(Frame(Block(blockName, attrs, [], '', []), startOffset, tokenLength, startOffset + tokenLength, leadingHtmlStart)); offset = startOffset + tokenLength; return true; case 'block-closer': // If we're missing an opener we're in trouble // This is an error. if (0 === stackDepth) { // We have options // - assume an implicit opener // - assume _this_ is the opener // - give up and close out the document. addFreeform(); return false; }
// If we're not nesting then this is easy - close the block. if (1 === stackDepth) { addBlockFromStack(startOffset); offset = startOffset + tokenLength; return true; }
// Otherwise we're nested and we have to close out the current // block and add it as a innerBlock to the parent. const stackTop = /** @type {ParsedFrame} */stack.pop(); const html = document.substr(stackTop.prevOffset, startOffset - stackTop.prevOffset); stackTop.block.innerHTML += html; stackTop.block.innerContent.push(html); stackTop.prevOffset = startOffset + tokenLength; addInnerBlock(stackTop.block, stackTop.tokenStart, stackTop.tokenLength, startOffset + tokenLength); offset = startOffset + tokenLength; return true; default: // This is an error. addFreeform(); return false; } }
/** * Parse JSON if valid, otherwise return null * * Note that JSON coming from the block comment * delimiters is constrained to be an object * and cannot be things like `true` or `null` * * @param {string} input JSON input string to parse * @return {Object|null} parsed JSON if valid */ function parseJSON(input) { try { return JSON.parse(input); } catch (e) { return null; } }
/** * Finds the next token in the document. * * @return {Token} The next matched token. */ function nextToken() { // Aye the magic // we're using a single RegExp to tokenize the block comment delimiters // we're also using a trick here because the only difference between a // block opener and a block closer is the leading `/` before `wp:` (and // a closer has no attributes). we can trap them both and process the // match back in JavaScript to see which one it was. const matches = tokenizer.exec(document);
// This state isn't allowed // This is an error. if (isCloser && (isVoid || hasAttrs)) { // We can ignore them since they don't hurt anything // we may warn against this at some point or reject it. } if (isVoid) { return ['void-block', name, attrs, startedAt, length]; } if (isCloser) { return ['block-closer', name, null, startedAt, length]; } return ['block-opener', name, attrs, startedAt, length]; }
/** * Adds a freeform block to the output. * * @param {number} [rawLength] */ function addFreeform(rawLength) { const length = rawLength ? rawLength : document.length - offset; if (0 === length) { return; } output.push(Freeform(document.substr(offset, length))); }