From 7b1ad75b0e7f9adfb397dda1ad28941cf319bef8 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 12:32:17 -0300 Subject: new parser progress so far (cur: 15k/sec) not implemented into the site (so just process.exit()), includes a couple examples and speedtest (not optimized at all yet) --- upd8-util.js | 11 +++ upd8.js | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 284 insertions(+), 1 deletion(-) diff --git a/upd8-util.js b/upd8-util.js index abeed6c..30260f8 100644 --- a/upd8-util.js +++ b/upd8-util.js @@ -416,3 +416,14 @@ module.exports.promisifyProcess = function(proc, showLogging = true) { // Stolen from jq! Which pro8a8ly stole the concept from other places. Nice. module.exports.withEntries = (obj, fn) => Object.fromEntries(fn(Object.entries(obj))); + +// Stolen from here: https://stackoverflow.com/a/53925033 +// We changed the # to // though. +module.exports.makeExtendedRegExp = (inputPatternStr, flags) => { + // Remove everything between the first unescaped `//` and the end of a line + // and then remove all unescaped whitespace + const cleanedPatternStr = inputPatternStr + .replace(/(^|[^\\])\/\/.*/g, '$1') + .replace(/(^|[^\\])\s+/g, '$1'); + return new RegExp(cleanedPatternStr, flags); +}; diff --git a/upd8.js b/upd8.js index 66582f1..d0857e2 100755 --- a/upd8.js +++ b/upd8.js @@ -117,6 +117,7 @@ const { logWarn, logInfo, logError, + makeExtendedRegExp, parseOptions, progressPromiseAll, queue, @@ -930,6 +931,11 @@ const replacerSpec = { search: 'staticPage', link: 'staticPage' }, + 'string': { + search: null, + value: ref => ref, + html: (ref, {strings, args}) => strings(ref, args) + }, 'tag': { search: 'tag', link: 'tag' @@ -955,7 +961,273 @@ const replacerSpec = { if (error) process.exit(); const categoryPart = Object.keys(replacerSpec).join('|'); - transformInline.regexp = new RegExp(String.raw`(? ({i, type, ...props}); + const makeError = (i, message) => makeNode(i, 'error', {message}); + const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); + + const parseOneTextNode = function(i, opts) { + const { nodes, i: newI, ...rest } = parseNodes(i, { + ...opts, + textOnly: true + }); + + return { + node: ( + nodes.length === 0 ? null : + nodes.length === 1 ? nodes[0] : + makeNode(i, 'text', { + string: nodes.map(node => node.string).join(' ') + })), + i, + ...rest + }; + }; + + const parseNodes = function(i, { + closerFn = null, + textOnly = false + } = {}) { + let nodes = []; + let escapeNext = false; + let string = ''; + let iString = 0; + + const matchLiteral = str => { + const fn = i => + (input.slice(i, i + str.length) === str + ? {iMatch: i, iParse: i + str.length, match: str, fn} + : null); + fn.literal = str; + return fn; + }; + + const matchAny = (...fns) => i => { + if (!fns.length) return null; + const result = fns[0](i); + if (result) return result; + return matchAny(...fns.slice(1))(i); + }; + + // Syntax literals. + const tagBeginning = matchLiteral('[['); + const tagEnding = matchLiteral(']]'); + const tagReplacerValue = matchLiteral(':'); + const tagArgument = matchLiteral('*'); + const tagArgumentValue = matchLiteral('='); + const tagLabel = matchLiteral('|'); + + const pushNode = (...args) => nodes.push(makeNode(...args)); + const pushTextNode = () => { + if (string.length) { + pushNode(iString, 'text', {string}); + string = ''; + } + }; + + while (i < input.length) { + let match; + + if (escapeNext) { + string += input[i]; + i++; + continue; + } + + if (input[i] === '\\') { + escapeNext = true; + i++; + continue; + } + + const closerResult = closerFn && closerFn(i); + if (closerResult) { + pushTextNode(); + return {nodes, i, closerResult}; + } + + if (match = tagBeginning(i)) { + if (textOnly) + throw makeError(i, `Unexpected [[tag]] - expected only text here.`); + + pushTextNode(); + + i = match.iParse; + + const iTag = match.iMatch; + + let P, // parse + N, // node + M; // match + const loadResults = result => { + P = result; + N = P.node || P.nodes; + M = P.closerResult; + }; + + // Replacer key (or value) + + loadResults(parseOneTextNode(i, { + closerFn: matchAny(tagReplacerValue, tagArgument, tagLabel, tagEnding) + })); + + if (!M) throw endOfInput(i, `reading replacer key`); + + if (!N) { + switch (M.fn) { + case tagReplacerValue: + case tagArgument: + case tagLabel: + throw makeError(i, `Expected text (replacer key).`); + case tagEnding: + throw makeError(i, `Expected text (replacer key/value).`); + } + } + + const replacerFirst = N; + i = M.iParse; + + // Replacer value (if explicit) + + let replacerSecond; + + if (M.fn === tagReplacerValue) { + loadResults(parseNodes(i, { + closerFn: matchAny(tagArgument, tagLabel, tagEnding) + })); + + if (!M) throw endOfInput(i, `reading replacer value`); + if (!N) throw makeError(i, `Expected content (replacer value).`); + + replacerSecond = N; + i = M.iParse + } + + // Assign first & second to replacer key/value + + // Value is an array of nodes, 8ut key is just one (or null). + // So if we use replacerFirst as the value, we need to stick + // it in an array (on its own). + const [ replacerKey, replacerValue ] = + (replacerSecond + ? [replacerFirst, replacerSecond] + : [null, [replacerFirst]]); + + // Arguments + + const args = []; + + while (M.fn === tagArgument) { + loadResults(parseOneTextNode(i, { + closerFn: matchAny(tagArgumentValue, tagArgument, tagLabel, tagEnding) + })); + + if (!M) throw endOfInput(i, `reading argument key`); + + if (M.fn !== tagArgumentValue) + throw makeError(i, `Expected ${tagArgumentValue.literal} (tag argument).`); + + if (!N) + throw makeError(i, `Expected text (argument key).`); + + const key = N; + i = M.iParse; + + loadResults(parseNodes(i, { + closerFn: matchAny(tagArgument, tagLabel, tagEnding) + })); + + if (!M) throw endOfInput(i, `reading argument value`); + if (!N) throw makeError(i, `Expected content (argument value).`); + + const value = N; + i = M.iParse; + + args.push({key, value}); + } + + let label; + + if (M.fn === tagLabel) { + loadResults(parseOneTextNode(i, { + closerFn: matchAny(tagEnding) + })); + + if (!M) throw endOfInput(i, `reading label`); + if (!N) throw makeError(i, `Expected text (label).`); + + label = N; + i = M.iParse; + } + + nodes.push(makeNode(iTag, 'tag', {replacerKey, replacerValue, args, label})); + + continue; + } + + string += input[i]; + i++; + } + + pushTextNode(); + return {nodes, i}; + }; + + try { + return parseNodes(0).nodes; + } catch (errorNode) { + if (errorNode.type !== 'error') { + throw errorNode; + } + + const { i, message } = errorNode; + + // TODO: Visual line/surrounding characters presentation! + throw new SyntaxError(`Parse error (at pos ${i}): ${message}`); + } + }; +} + +{ + const show = input => process.stdout.write(`-- ${input}\n` + util.inspect( + transformInline.parse(input), + { + depth: null, + colors: true + } + ) + '\n\n'); + + show(`[[album:are-you-lost|Cristata's new album]]`); + show(`[[string:content.donate.patreonLine*link=[[external:https://www.patreon.com/qznebula|Patreon]]]]`); +} + +{ + function test(input) { + let n = 0; + const start = Date.now(); + const end = start + 1000; + while (Date.now() < end) { + transformInline.parse(input); + n++; + } + console.log(`Ran ${n} times.`); + } + + test(fixWS` + [[string:content.donate.patreonLine*link=[[external:https://www.patreon.com/qznebula|Patreon]]]] + Hello, world! Wow [[album:the-beans-zone]] is some cool stuff. + `); + process.exit(); } function transformInline(text, {strings, to}) { -- cgit 1.3.0-6-gf8a5