From 7b1ad75b0e7f9adfb397dda1ad28941cf319bef8 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 12:32:17 -0300 Subject: new parser progress so far (cur: 15k/sec) not implemented into the site (so just process.exit()), includes a couple examples and speedtest (not optimized at all yet) --- upd8-util.js | 11 +++ upd8.js | 274 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 284 insertions(+), 1 deletion(-) diff --git a/upd8-util.js b/upd8-util.js index abeed6c..30260f8 100644 --- a/upd8-util.js +++ b/upd8-util.js @@ -416,3 +416,14 @@ module.exports.promisifyProcess = function(proc, showLogging = true) { // Stolen from jq! Which pro8a8ly stole the concept from other places. Nice. module.exports.withEntries = (obj, fn) => Object.fromEntries(fn(Object.entries(obj))); + +// Stolen from here: https://stackoverflow.com/a/53925033 +// We changed the # to // though. +module.exports.makeExtendedRegExp = (inputPatternStr, flags) => { + // Remove everything between the first unescaped `//` and the end of a line + // and then remove all unescaped whitespace + const cleanedPatternStr = inputPatternStr + .replace(/(^|[^\\])\/\/.*/g, '$1') + .replace(/(^|[^\\])\s+/g, '$1'); + return new RegExp(cleanedPatternStr, flags); +}; diff --git a/upd8.js b/upd8.js index 66582f1..d0857e2 100755 --- a/upd8.js +++ b/upd8.js @@ -117,6 +117,7 @@ const { logWarn, logInfo, logError, + makeExtendedRegExp, parseOptions, progressPromiseAll, queue, @@ -930,6 +931,11 @@ const replacerSpec = { search: 'staticPage', link: 'staticPage' }, + 'string': { + search: null, + value: ref => ref, + html: (ref, {strings, args}) => strings(ref, args) + }, 'tag': { search: 'tag', link: 'tag' @@ -955,7 +961,273 @@ const replacerSpec = { if (error) process.exit(); const categoryPart = Object.keys(replacerSpec).join('|'); - transformInline.regexp = new RegExp(String.raw`(? ({i, type, ...props}); + const makeError = (i, message) => makeNode(i, 'error', {message}); + const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); + + const parseOneTextNode = function(i, opts) { + const { nodes, i: newI, ...rest } = parseNodes(i, { + ...opts, + textOnly: true + }); + + return { + node: ( + nodes.length === 0 ? null : + nodes.length === 1 ? nodes[0] : + makeNode(i, 'text', { + string: nodes.map(node => node.string).join(' ') + })), + i, + ...rest + }; + }; + + const parseNodes = function(i, { + closerFn = null, + textOnly = false + } = {}) { + let nodes = []; + let escapeNext = false; + let string = ''; + let iString = 0; + + const matchLiteral = str => { + const fn = i => + (input.slice(i, i + str.length) === str + ? {iMatch: i, iParse: i + str.length, match: str, fn} + : null); + fn.literal = str; + return fn; + }; + + const matchAny = (...fns) => i => { + if (!fns.length) return null; + const result = fns[0](i); + if (result) return result; + return matchAny(...fns.slice(1))(i); + }; + + // Syntax literals. + const tagBeginning = matchLiteral('[['); + const tagEnding = matchLiteral(']]'); + const tagReplacerValue = matchLiteral(':'); + const tagArgument = matchLiteral('*'); + const tagArgumentValue = matchLiteral('='); + const tagLabel = matchLiteral('|'); + + const pushNode = (...args) => nodes.push(makeNode(...args)); + const pushTextNode = () => { + if (string.length) { + pushNode(iString, 'text', {string}); + string = ''; + } + }; + + while (i < input.length) { + let match; + + if (escapeNext) { + string += input[i]; + i++; + continue; + } + + if (input[i] === '\\') { + escapeNext = true; + i++; + continue; + } + + const closerResult = closerFn && closerFn(i); + if (closerResult) { + pushTextNode(); + return {nodes, i, closerResult}; + } + + if (match = tagBeginning(i)) { + if (textOnly) + throw makeError(i, `Unexpected [[tag]] - expected only text here.`); + + pushTextNode(); + + i = match.iParse; + + const iTag = match.iMatch; + + let P, // parse + N, // node + M; // match + const loadResults = result => { + P = result; + N = P.node || P.nodes; + M = P.closerResult; + }; + + // Replacer key (or value) + + loadResults(parseOneTextNode(i, { + closerFn: matchAny(tagReplacerValue, tagArgument, tagLabel, tagEnding) + })); + + if (!M) throw endOfInput(i, `reading replacer key`); + + if (!N) { + switch (M.fn) { + case tagReplacerValue: + case tagArgument: + case tagLabel: + throw makeError(i, `Expected text (replacer key).`); + case tagEnding: + throw makeError(i, `Expected text (replacer key/value).`); + } + } + + const replacerFirst = N; + i = M.iParse; + + // Replacer value (if explicit) + + let replacerSecond; + + if (M.fn === tagReplacerValue) { + loadResults(parseNodes(i, { + closerFn: matchAny(tagArgument, tagLabel, tagEnding) + })); + + if (!M) throw endOfInput(i, `reading replacer value`); + if (!N) throw makeError(i, `Expected content (replacer value).`); + + replacerSecond = N; + i = M.iParse + } + + // Assign first & second to replacer key/value + + // Value is an array of nodes, 8ut key is just one (or null). + // So if we use replacerFirst as the value, we need to stick + // it in an array (on its own). + const [ replacerKey, replacerValue ] = + (replacerSecond + ? [replacerFirst, replacerSecond] + : [null, [replacerFirst]]); + + // Arguments + + const args = []; + + while (M.fn === tagArgument) { + loadResults(parseOneTextNode(i, { + closerFn: matchAny(tagArgumentValue, tagArgument, tagLabel, tagEnding) + })); + + if (!M) throw endOfInput(i, `reading argument key`); + + if (M.fn !== tagArgumentValue) + throw makeError(i, `Expected ${tagArgumentValue.literal} (tag argument).`); + + if (!N) + throw makeError(i, `Expected text (argument key).`); + + const key = N; + i = M.iParse; + + loadResults(parseNodes(i, { + closerFn: matchAny(tagArgument, tagLabel, tagEnding) + })); + + if (!M) throw endOfInput(i, `reading argument value`); + if (!N) throw makeError(i, `Expected content (argument value).`); + + const value = N; + i = M.iParse; + + args.push({key, value}); + } + + let label; + + if (M.fn === tagLabel) { + loadResults(parseOneTextNode(i, { + closerFn: matchAny(tagEnding) + })); + + if (!M) throw endOfInput(i, `reading label`); + if (!N) throw makeError(i, `Expected text (label).`); + + label = N; + i = M.iParse; + } + + nodes.push(makeNode(iTag, 'tag', {replacerKey, replacerValue, args, label})); + + continue; + } + + string += input[i]; + i++; + } + + pushTextNode(); + return {nodes, i}; + }; + + try { + return parseNodes(0).nodes; + } catch (errorNode) { + if (errorNode.type !== 'error') { + throw errorNode; + } + + const { i, message } = errorNode; + + // TODO: Visual line/surrounding characters presentation! + throw new SyntaxError(`Parse error (at pos ${i}): ${message}`); + } + }; +} + +{ + const show = input => process.stdout.write(`-- ${input}\n` + util.inspect( + transformInline.parse(input), + { + depth: null, + colors: true + } + ) + '\n\n'); + + show(`[[album:are-you-lost|Cristata's new album]]`); + show(`[[string:content.donate.patreonLine*link=[[external:https://www.patreon.com/qznebula|Patreon]]]]`); +} + +{ + function test(input) { + let n = 0; + const start = Date.now(); + const end = start + 1000; + while (Date.now() < end) { + transformInline.parse(input); + n++; + } + console.log(`Ran ${n} times.`); + } + + test(fixWS` + [[string:content.donate.patreonLine*link=[[external:https://www.patreon.com/qznebula|Patreon]]]] + Hello, world! Wow [[album:the-beans-zone]] is some cool stuff. + `); + process.exit(); } function transformInline(text, {strings, to}) { -- cgit 1.3.0-6-gf8a5 From ff6c76f0c3ffdf43db8ab73acfb21b9a4d8da83b Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 12:50:09 -0300 Subject: literal matching as plain strings (cur: 40k/sec) --- upd8.js | 78 +++++++++++++++++++++++++++-------------------------------------- 1 file changed, 32 insertions(+), 46 deletions(-) diff --git a/upd8.js b/upd8.js index d0857e2..baf73c0 100755 --- a/upd8.js +++ b/upd8.js @@ -995,7 +995,7 @@ const replacerSpec = { }; const parseNodes = function(i, { - closerFn = null, + stopAt = null, textOnly = false } = {}) { let nodes = []; @@ -1003,29 +1003,13 @@ const replacerSpec = { let string = ''; let iString = 0; - const matchLiteral = str => { - const fn = i => - (input.slice(i, i + str.length) === str - ? {iMatch: i, iParse: i + str.length, match: str, fn} - : null); - fn.literal = str; - return fn; - }; - - const matchAny = (...fns) => i => { - if (!fns.length) return null; - const result = fns[0](i); - if (result) return result; - return matchAny(...fns.slice(1))(i); - }; - // Syntax literals. - const tagBeginning = matchLiteral('[['); - const tagEnding = matchLiteral(']]'); - const tagReplacerValue = matchLiteral(':'); - const tagArgument = matchLiteral('*'); - const tagArgumentValue = matchLiteral('='); - const tagLabel = matchLiteral('|'); + const tagBeginning = '[['; + const tagEnding = ']]'; + const tagReplacerValue = ':'; + const tagArgument = '*'; + const tagArgumentValue = '='; + const tagLabel = '|'; const pushNode = (...args) => nodes.push(makeNode(...args)); const pushTextNode = () => { @@ -1036,8 +1020,6 @@ const replacerSpec = { }; while (i < input.length) { - let match; - if (escapeNext) { string += input[i]; i++; @@ -1050,41 +1032,45 @@ const replacerSpec = { continue; } - const closerResult = closerFn && closerFn(i); - if (closerResult) { - pushTextNode(); - return {nodes, i, closerResult}; + if (stopAt) { + for (const literal of stopAt) { + if (input.slice(i, i + literal.length) === literal) { + pushTextNode(); + return { + nodes, i, + stoppedAt: {iMatch: i, iParse: i + literal.length, literal} + }; + } + } } - if (match = tagBeginning(i)) { + if (input.slice(i, i + tagBeginning.length) === tagBeginning) { if (textOnly) throw makeError(i, `Unexpected [[tag]] - expected only text here.`); pushTextNode(); - - i = match.iParse; - - const iTag = match.iMatch; + const iTag = i; + i += tagBeginning.length; let P, // parse N, // node - M; // match + M; // match (stopped at) const loadResults = result => { P = result; N = P.node || P.nodes; - M = P.closerResult; + M = P.stoppedAt; }; // Replacer key (or value) loadResults(parseOneTextNode(i, { - closerFn: matchAny(tagReplacerValue, tagArgument, tagLabel, tagEnding) + stopAt: [tagReplacerValue, tagArgument, tagLabel, tagEnding] })); if (!M) throw endOfInput(i, `reading replacer key`); if (!N) { - switch (M.fn) { + switch (M.literal) { case tagReplacerValue: case tagArgument: case tagLabel: @@ -1101,9 +1087,9 @@ const replacerSpec = { let replacerSecond; - if (M.fn === tagReplacerValue) { + if (M.literal === tagReplacerValue) { loadResults(parseNodes(i, { - closerFn: matchAny(tagArgument, tagLabel, tagEnding) + stopAt: [tagArgument, tagLabel, tagEnding] })); if (!M) throw endOfInput(i, `reading replacer value`); @@ -1127,14 +1113,14 @@ const replacerSpec = { const args = []; - while (M.fn === tagArgument) { + while (M.literal === tagArgument) { loadResults(parseOneTextNode(i, { - closerFn: matchAny(tagArgumentValue, tagArgument, tagLabel, tagEnding) + stopAt: [tagArgumentValue, tagArgument, tagLabel, tagEnding] })); if (!M) throw endOfInput(i, `reading argument key`); - if (M.fn !== tagArgumentValue) + if (M.literal !== tagArgumentValue) throw makeError(i, `Expected ${tagArgumentValue.literal} (tag argument).`); if (!N) @@ -1144,7 +1130,7 @@ const replacerSpec = { i = M.iParse; loadResults(parseNodes(i, { - closerFn: matchAny(tagArgument, tagLabel, tagEnding) + stopAt: [tagArgument, tagLabel, tagEnding] })); if (!M) throw endOfInput(i, `reading argument value`); @@ -1158,9 +1144,9 @@ const replacerSpec = { let label; - if (M.fn === tagLabel) { + if (M.literal === tagLabel) { loadResults(parseOneTextNode(i, { - closerFn: matchAny(tagEnding) + stopAt: [tagEnding] })); if (!M) throw endOfInput(i, `reading label`); -- cgit 1.3.0-6-gf8a5 From 37a4c3e3d92d10b82963018c1df51608096fafd0 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:09:33 -0300 Subject: no unnecessary objects in returns (cur: 46k/sec) --- upd8.js | 100 +++++++++++++++++++++++++++++++--------------------------------- 1 file changed, 48 insertions(+), 52 deletions(-) diff --git a/upd8.js b/upd8.js index baf73c0..3bb2e01 100755 --- a/upd8.js +++ b/upd8.js @@ -976,22 +976,24 @@ const replacerSpec = { const makeError = (i, message) => makeNode(i, 'error', {message}); const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); + let stopped, + stop_iMatch, + stop_iParse, + stop_literal; + const parseOneTextNode = function(i, opts) { - const { nodes, i: newI, ...rest } = parseNodes(i, { + const nodes = parseNodes(i, { ...opts, textOnly: true }); - return { - node: ( - nodes.length === 0 ? null : - nodes.length === 1 ? nodes[0] : - makeNode(i, 'text', { - string: nodes.map(node => node.string).join(' ') - })), - i, - ...rest - }; + return ( + nodes.length === 0 ? null : + nodes.length === 1 ? nodes[0] : + makeNode(i, 'text', { + string: nodes.map(node => node.string).join(' ') + }) + ); }; const parseNodes = function(i, { @@ -1036,10 +1038,11 @@ const replacerSpec = { for (const literal of stopAt) { if (input.slice(i, i + literal.length) === literal) { pushTextNode(); - return { - nodes, i, - stoppedAt: {iMatch: i, iParse: i + literal.length, literal} - }; + stopped = true; + stop_iMatch = i; + stop_iParse = i + literal.length; + stop_literal = literal; + return nodes; } } } @@ -1052,25 +1055,18 @@ const replacerSpec = { const iTag = i; i += tagBeginning.length; - let P, // parse - N, // node - M; // match (stopped at) - const loadResults = result => { - P = result; - N = P.node || P.nodes; - M = P.stoppedAt; - }; + let N; // Replacer key (or value) - loadResults(parseOneTextNode(i, { + N = parseOneTextNode(i, { stopAt: [tagReplacerValue, tagArgument, tagLabel, tagEnding] - })); + }); - if (!M) throw endOfInput(i, `reading replacer key`); + if (!stopped) throw endOfInput(i, `reading replacer key`); if (!N) { - switch (M.literal) { + switch (stop_literal) { case tagReplacerValue: case tagArgument: case tagLabel: @@ -1081,22 +1077,22 @@ const replacerSpec = { } const replacerFirst = N; - i = M.iParse; + i = stop_iParse; // Replacer value (if explicit) let replacerSecond; - if (M.literal === tagReplacerValue) { - loadResults(parseNodes(i, { + if (stop_literal === tagReplacerValue) { + N = parseNodes(i, { stopAt: [tagArgument, tagLabel, tagEnding] - })); + }); - if (!M) throw endOfInput(i, `reading replacer value`); - if (!N) throw makeError(i, `Expected content (replacer value).`); + if (!stopped) throw endOfInput(i, `reading replacer value`); + if (!N.length) throw makeError(i, `Expected content (replacer value).`); replacerSecond = N; - i = M.iParse + i = stop_iParse } // Assign first & second to replacer key/value @@ -1113,47 +1109,47 @@ const replacerSpec = { const args = []; - while (M.literal === tagArgument) { - loadResults(parseOneTextNode(i, { + while (stop_literal === tagArgument) { + N = parseOneTextNode(i, { stopAt: [tagArgumentValue, tagArgument, tagLabel, tagEnding] - })); + }); - if (!M) throw endOfInput(i, `reading argument key`); + if (!stopped) throw endOfInput(i, `reading argument key`); - if (M.literal !== tagArgumentValue) + if (stop_literal !== tagArgumentValue) throw makeError(i, `Expected ${tagArgumentValue.literal} (tag argument).`); if (!N) throw makeError(i, `Expected text (argument key).`); const key = N; - i = M.iParse; + i = stop_iParse; - loadResults(parseNodes(i, { + N = parseNodes(i, { stopAt: [tagArgument, tagLabel, tagEnding] - })); + }); - if (!M) throw endOfInput(i, `reading argument value`); - if (!N) throw makeError(i, `Expected content (argument value).`); + if (!stopped) throw endOfInput(i, `reading argument value`); + if (!N.length) throw makeError(i, `Expected content (argument value).`); const value = N; - i = M.iParse; + i = stop_iParse; args.push({key, value}); } let label; - if (M.literal === tagLabel) { - loadResults(parseOneTextNode(i, { + if (stop_literal === tagLabel) { + N = parseOneTextNode(i, { stopAt: [tagEnding] - })); + }); - if (!M) throw endOfInput(i, `reading label`); + if (!stopped) throw endOfInput(i, `reading label`); if (!N) throw makeError(i, `Expected text (label).`); label = N; - i = M.iParse; + i = stop_iParse; } nodes.push(makeNode(iTag, 'tag', {replacerKey, replacerValue, args, label})); @@ -1166,11 +1162,11 @@ const replacerSpec = { } pushTextNode(); - return {nodes, i}; + return nodes; }; try { - return parseNodes(0).nodes; + return parseNodes(0); } catch (errorNode) { if (errorNode.type !== 'error') { throw errorNode; -- cgit 1.3.0-6-gf8a5 From 802b1ed64b729f95b77d48f9f024141e95d296a5 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:16:40 -0300 Subject: don't recreate reused functions (cur: 48k/sec) This commit 8est viewed with whitespace changes hidden! --- upd8.js | 307 ++++++++++++++++++++++++++++++++-------------------------------- 1 file changed, 155 insertions(+), 152 deletions(-) diff --git a/upd8.js b/upd8.js index 3bb2e01..7b66215 100755 --- a/upd8.js +++ b/upd8.js @@ -971,202 +971,205 @@ const replacerSpec = { \]\] // Closing ]]. `, 'g'); - transformInline.parse = function(input) { - const makeNode = (i, type, props) => ({i, type, ...props}); - const makeError = (i, message) => makeNode(i, 'error', {message}); - const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); - - let stopped, - stop_iMatch, - stop_iParse, - stop_literal; - - const parseOneTextNode = function(i, opts) { - const nodes = parseNodes(i, { - ...opts, - textOnly: true - }); + // Syntax literals. + const tagBeginning = '[['; + const tagEnding = ']]'; + const tagReplacerValue = ':'; + const tagArgument = '*'; + const tagArgumentValue = '='; + const tagLabel = '|'; + + const makeNode = (i, type, props) => ({i, type, ...props}); + const makeError = (i, message) => makeNode(i, 'error', {message}); + const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); + + // These are 8asically stored on the glo8al scope, which might seem odd + // for a recursive function, 8ut the values are only ever used immediately + // after they're set. + let stopped, + stop_iMatch, + stop_iParse, + stop_literal; + + const parseOneTextNode = function(input, i, opts) { + const nodes = parseNodes(input, i, { + ...opts, + textOnly: true + }); - return ( - nodes.length === 0 ? null : - nodes.length === 1 ? nodes[0] : - makeNode(i, 'text', { - string: nodes.map(node => node.string).join(' ') - }) - ); - }; + return ( + nodes.length === 0 ? null : + nodes.length === 1 ? nodes[0] : + makeNode(i, 'text', { + string: nodes.map(node => node.string).join(' ') + }) + ); + }; - const parseNodes = function(i, { - stopAt = null, - textOnly = false - } = {}) { - let nodes = []; - let escapeNext = false; - let string = ''; - let iString = 0; - - // Syntax literals. - const tagBeginning = '[['; - const tagEnding = ']]'; - const tagReplacerValue = ':'; - const tagArgument = '*'; - const tagArgumentValue = '='; - const tagLabel = '|'; - - const pushNode = (...args) => nodes.push(makeNode(...args)); - const pushTextNode = () => { - if (string.length) { - pushNode(iString, 'text', {string}); - string = ''; - } - }; + const parseNodes = function(input, i, { + stopAt = null, + textOnly = false + } = {}) { + let nodes = []; + let escapeNext = false; + let string = ''; + let iString = 0; + + const pushNode = (...args) => nodes.push(makeNode(...args)); + const pushTextNode = () => { + if (string.length) { + pushNode(iString, 'text', {string}); + string = ''; + } + }; - while (i < input.length) { - if (escapeNext) { - string += input[i]; - i++; - continue; - } + while (i < input.length) { + if (escapeNext) { + string += input[i]; + i++; + continue; + } - if (input[i] === '\\') { - escapeNext = true; - i++; - continue; - } + if (input[i] === '\\') { + escapeNext = true; + i++; + continue; + } - if (stopAt) { - for (const literal of stopAt) { - if (input.slice(i, i + literal.length) === literal) { - pushTextNode(); - stopped = true; - stop_iMatch = i; - stop_iParse = i + literal.length; - stop_literal = literal; - return nodes; - } + if (stopAt) { + for (const literal of stopAt) { + if (input.slice(i, i + literal.length) === literal) { + pushTextNode(); + stopped = true; + stop_iMatch = i; + stop_iParse = i + literal.length; + stop_literal = literal; + return nodes; } } + } - if (input.slice(i, i + tagBeginning.length) === tagBeginning) { - if (textOnly) - throw makeError(i, `Unexpected [[tag]] - expected only text here.`); + if (input.slice(i, i + tagBeginning.length) === tagBeginning) { + if (textOnly) + throw makeError(i, `Unexpected [[tag]] - expected only text here.`); - pushTextNode(); - const iTag = i; - i += tagBeginning.length; + pushTextNode(); + const iTag = i; + i += tagBeginning.length; - let N; + let N; - // Replacer key (or value) + // Replacer key (or value) - N = parseOneTextNode(i, { - stopAt: [tagReplacerValue, tagArgument, tagLabel, tagEnding] - }); + N = parseOneTextNode(input, i, { + stopAt: [tagReplacerValue, tagArgument, tagLabel, tagEnding] + }); - if (!stopped) throw endOfInput(i, `reading replacer key`); + if (!stopped) throw endOfInput(i, `reading replacer key`); - if (!N) { - switch (stop_literal) { - case tagReplacerValue: - case tagArgument: - case tagLabel: - throw makeError(i, `Expected text (replacer key).`); - case tagEnding: - throw makeError(i, `Expected text (replacer key/value).`); - } + if (!N) { + switch (stop_literal) { + case tagReplacerValue: + case tagArgument: + case tagLabel: + throw makeError(i, `Expected text (replacer key).`); + case tagEnding: + throw makeError(i, `Expected text (replacer key/value).`); } + } - const replacerFirst = N; - i = stop_iParse; - - // Replacer value (if explicit) - - let replacerSecond; + const replacerFirst = N; + i = stop_iParse; - if (stop_literal === tagReplacerValue) { - N = parseNodes(i, { - stopAt: [tagArgument, tagLabel, tagEnding] - }); + // Replacer value (if explicit) - if (!stopped) throw endOfInput(i, `reading replacer value`); - if (!N.length) throw makeError(i, `Expected content (replacer value).`); + let replacerSecond; - replacerSecond = N; - i = stop_iParse - } + if (stop_literal === tagReplacerValue) { + N = parseNodes(input, i, { + stopAt: [tagArgument, tagLabel, tagEnding] + }); - // Assign first & second to replacer key/value + if (!stopped) throw endOfInput(i, `reading replacer value`); + if (!N.length) throw makeError(i, `Expected content (replacer value).`); - // Value is an array of nodes, 8ut key is just one (or null). - // So if we use replacerFirst as the value, we need to stick - // it in an array (on its own). - const [ replacerKey, replacerValue ] = - (replacerSecond - ? [replacerFirst, replacerSecond] - : [null, [replacerFirst]]); + replacerSecond = N; + i = stop_iParse + } - // Arguments + // Assign first & second to replacer key/value - const args = []; + // Value is an array of nodes, 8ut key is just one (or null). + // So if we use replacerFirst as the value, we need to stick + // it in an array (on its own). + const [ replacerKey, replacerValue ] = + (replacerSecond + ? [replacerFirst, replacerSecond] + : [null, [replacerFirst]]); - while (stop_literal === tagArgument) { - N = parseOneTextNode(i, { - stopAt: [tagArgumentValue, tagArgument, tagLabel, tagEnding] - }); + // Arguments - if (!stopped) throw endOfInput(i, `reading argument key`); + const args = []; - if (stop_literal !== tagArgumentValue) - throw makeError(i, `Expected ${tagArgumentValue.literal} (tag argument).`); + while (stop_literal === tagArgument) { + N = parseOneTextNode(input, i, { + stopAt: [tagArgumentValue, tagArgument, tagLabel, tagEnding] + }); - if (!N) - throw makeError(i, `Expected text (argument key).`); + if (!stopped) throw endOfInput(i, `reading argument key`); - const key = N; - i = stop_iParse; + if (stop_literal !== tagArgumentValue) + throw makeError(i, `Expected ${tagArgumentValue.literal} (tag argument).`); - N = parseNodes(i, { - stopAt: [tagArgument, tagLabel, tagEnding] - }); + if (!N) + throw makeError(i, `Expected text (argument key).`); - if (!stopped) throw endOfInput(i, `reading argument value`); - if (!N.length) throw makeError(i, `Expected content (argument value).`); + const key = N; + i = stop_iParse; - const value = N; - i = stop_iParse; + N = parseNodes(input, i, { + stopAt: [tagArgument, tagLabel, tagEnding] + }); - args.push({key, value}); - } + if (!stopped) throw endOfInput(i, `reading argument value`); + if (!N.length) throw makeError(i, `Expected content (argument value).`); - let label; + const value = N; + i = stop_iParse; - if (stop_literal === tagLabel) { - N = parseOneTextNode(i, { - stopAt: [tagEnding] - }); + args.push({key, value}); + } - if (!stopped) throw endOfInput(i, `reading label`); - if (!N) throw makeError(i, `Expected text (label).`); + let label; - label = N; - i = stop_iParse; - } + if (stop_literal === tagLabel) { + N = parseOneTextNode(input, i, { + stopAt: [tagEnding] + }); - nodes.push(makeNode(iTag, 'tag', {replacerKey, replacerValue, args, label})); + if (!stopped) throw endOfInput(i, `reading label`); + if (!N) throw makeError(i, `Expected text (label).`); - continue; + label = N; + i = stop_iParse; } - string += input[i]; - i++; + nodes.push(makeNode(iTag, 'tag', {replacerKey, replacerValue, args, label})); + + continue; } - pushTextNode(); - return nodes; - }; + string += input[i]; + i++; + } + pushTextNode(); + return nodes; + }; + + transformInline.parse = function(input) { try { - return parseNodes(0); + return parseNodes(input, 0); } catch (errorNode) { if (errorNode.type !== 'error') { throw errorNode; -- cgit 1.3.0-6-gf8a5 From 5eb59b15d7edbdeeb366b9ee248ae1045a326d8a Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:20:23 -0300 Subject: simplify once-used pushNode call (cur: 48.5k/sec) The idea here is to get rid of unnecessary destructuring of the arguments array. --- upd8.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/upd8.js b/upd8.js index 7b66215..b610fa8 100755 --- a/upd8.js +++ b/upd8.js @@ -1015,10 +1015,9 @@ const replacerSpec = { let string = ''; let iString = 0; - const pushNode = (...args) => nodes.push(makeNode(...args)); const pushTextNode = () => { if (string.length) { - pushNode(iString, 'text', {string}); + nodes.push(makeNode(iString, 'text', {string})); string = ''; } }; -- cgit 1.3.0-6-gf8a5 From 8adc9f202a17578102ccbd3c4989757fdb42cd72 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:22:53 -0300 Subject: store node data in its own property (cur: 51k/sec) Another place to avoid destructuring (actually the spread operator, i.e. effectively an Object.assign call). --- upd8.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upd8.js b/upd8.js index b610fa8..76a4e59 100755 --- a/upd8.js +++ b/upd8.js @@ -979,7 +979,7 @@ const replacerSpec = { const tagArgumentValue = '='; const tagLabel = '|'; - const makeNode = (i, type, props) => ({i, type, ...props}); + const makeNode = (i, type, data) => ({i, type, data}); const makeError = (i, message) => makeNode(i, 'error', {message}); const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); -- cgit 1.3.0-6-gf8a5 From ae755a3c27c12f2928aa4e32ac99a1736a286e20 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:26:57 -0300 Subject: remove objs from fn signatures (cur: 84k/sec) Hoo 8*tch, that's a 8ig improvement! This gets rid of the last spread/destructure, a straggler in parseOneTextNode, and makes the only non-primitive passed to either itself or parseNodes the array of tags to parse, cutting away a 8unch of o8jects which were only getting used the one time. --- upd8.js | 32 ++++++++------------------------ 1 file changed, 8 insertions(+), 24 deletions(-) diff --git a/upd8.js b/upd8.js index 76a4e59..3b495b9 100755 --- a/upd8.js +++ b/upd8.js @@ -991,11 +991,8 @@ const replacerSpec = { stop_iParse, stop_literal; - const parseOneTextNode = function(input, i, opts) { - const nodes = parseNodes(input, i, { - ...opts, - textOnly: true - }); + const parseOneTextNode = function(input, i, stopAt) { + const nodes = parseNodes(input, i, stopAt, true); return ( nodes.length === 0 ? null : @@ -1006,10 +1003,7 @@ const replacerSpec = { ); }; - const parseNodes = function(input, i, { - stopAt = null, - textOnly = false - } = {}) { + const parseNodes = function(input, i, stopAt, textOnly) { let nodes = []; let escapeNext = false; let string = ''; @@ -1060,9 +1054,7 @@ const replacerSpec = { // Replacer key (or value) - N = parseOneTextNode(input, i, { - stopAt: [tagReplacerValue, tagArgument, tagLabel, tagEnding] - }); + N = parseOneTextNode(input, i, [tagReplacerValue, tagArgument, tagLabel, tagEnding]); if (!stopped) throw endOfInput(i, `reading replacer key`); @@ -1085,9 +1077,7 @@ const replacerSpec = { let replacerSecond; if (stop_literal === tagReplacerValue) { - N = parseNodes(input, i, { - stopAt: [tagArgument, tagLabel, tagEnding] - }); + N = parseNodes(input, i, [tagArgument, tagLabel, tagEnding]); if (!stopped) throw endOfInput(i, `reading replacer value`); if (!N.length) throw makeError(i, `Expected content (replacer value).`); @@ -1111,9 +1101,7 @@ const replacerSpec = { const args = []; while (stop_literal === tagArgument) { - N = parseOneTextNode(input, i, { - stopAt: [tagArgumentValue, tagArgument, tagLabel, tagEnding] - }); + N = parseOneTextNode(input, i, [tagArgumentValue, tagArgument, tagLabel, tagEnding]); if (!stopped) throw endOfInput(i, `reading argument key`); @@ -1126,9 +1114,7 @@ const replacerSpec = { const key = N; i = stop_iParse; - N = parseNodes(input, i, { - stopAt: [tagArgument, tagLabel, tagEnding] - }); + N = parseNodes(input, i, [tagArgument, tagLabel, tagEnding]); if (!stopped) throw endOfInput(i, `reading argument value`); if (!N.length) throw makeError(i, `Expected content (argument value).`); @@ -1142,9 +1128,7 @@ const replacerSpec = { let label; if (stop_literal === tagLabel) { - N = parseOneTextNode(input, i, { - stopAt: [tagEnding] - }); + N = parseOneTextNode(input, i, [tagEnding]); if (!stopped) throw endOfInput(i, `reading label`); if (!N) throw makeError(i, `Expected text (label).`); -- cgit 1.3.0-6-gf8a5 From 8a0fb4488ccb5dc5c799fe44977ca55edadba3cb Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:34:19 -0300 Subject: make data in text nodes string (cur: 84k/sec) This doesn't actually impact the performance at all, 8ut it's going to 8e nicer to work with later. --- upd8.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/upd8.js b/upd8.js index 3b495b9..37b72ae 100755 --- a/upd8.js +++ b/upd8.js @@ -997,9 +997,7 @@ const replacerSpec = { return ( nodes.length === 0 ? null : nodes.length === 1 ? nodes[0] : - makeNode(i, 'text', { - string: nodes.map(node => node.string).join(' ') - }) + makeNode(i, 'text', nodes.map(node => node.string).join(' ')) ); }; @@ -1011,7 +1009,7 @@ const replacerSpec = { const pushTextNode = () => { if (string.length) { - nodes.push(makeNode(iString, 'text', {string})); + nodes.push(makeNode(iString, 'text', string)); string = ''; } }; -- cgit 1.3.0-6-gf8a5 From aaef4b0ca3b1432101d1ca84cc7b4898ddee7789 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:36:58 -0300 Subject: remove void parseOneTextNode logic (cur: 84k/sec) If textOnly is passed, parseNodes will only ever return a single text node, so the other paths were never getting reached. This is only cleanup, though - no significant performance improvement. --- upd8.js | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/upd8.js b/upd8.js index 37b72ae..eec04d7 100755 --- a/upd8.js +++ b/upd8.js @@ -992,13 +992,7 @@ const replacerSpec = { stop_literal; const parseOneTextNode = function(input, i, stopAt) { - const nodes = parseNodes(input, i, stopAt, true); - - return ( - nodes.length === 0 ? null : - nodes.length === 1 ? nodes[0] : - makeNode(i, 'text', nodes.map(node => node.string).join(' ')) - ); + return parseNodes(input, i, stopAt, true)[0]; }; const parseNodes = function(input, i, stopAt, textOnly) { -- cgit 1.3.0-6-gf8a5 From 4f8158e579a7092fcfbf8efc944f6608d85f70b6 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:49:17 -0300 Subject: don't use makeNode (cur: 84k/sec) This actually might 8e 85k/sec 8ut that feels, uh, counter-intutive? Pretty sure it was just getting optimized out, anyway. --- upd8.js | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/upd8.js b/upd8.js index eec04d7..9e2ed2e 100755 --- a/upd8.js +++ b/upd8.js @@ -979,8 +979,7 @@ const replacerSpec = { const tagArgumentValue = '='; const tagLabel = '|'; - const makeNode = (i, type, data) => ({i, type, data}); - const makeError = (i, message) => makeNode(i, 'error', {message}); + const makeError = (i, message) => ({i, type: 'error', data: {message}}); const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); // These are 8asically stored on the glo8al scope, which might seem odd @@ -1003,7 +1002,7 @@ const replacerSpec = { const pushTextNode = () => { if (string.length) { - nodes.push(makeNode(iString, 'text', string)); + nodes.push({i: iString, type: 'text', data: string}); string = ''; } }; @@ -1129,7 +1128,7 @@ const replacerSpec = { i = stop_iParse; } - nodes.push(makeNode(iTag, 'tag', {replacerKey, replacerValue, args, label})); + nodes.push({i: iTag, type: 'tag', data: {replacerKey, replacerValue, args, label}}); continue; } -- cgit 1.3.0-6-gf8a5 From e5681aa48b464fc194b4445670e7bf027f314562 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:50:44 -0300 Subject: parser error throwing/handling bugfixes --- upd8.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/upd8.js b/upd8.js index 9e2ed2e..4555b00 100755 --- a/upd8.js +++ b/upd8.js @@ -1000,6 +1000,8 @@ const replacerSpec = { let string = ''; let iString = 0; + stopped = false; + const pushTextNode = () => { if (string.length) { nodes.push({i: iString, type: 'text', data: string}); @@ -1149,7 +1151,7 @@ const replacerSpec = { throw errorNode; } - const { i, message } = errorNode; + const { i, data: { message } } = errorNode; // TODO: Visual line/surrounding characters presentation! throw new SyntaxError(`Parse error (at pos ${i}): ${message}`); -- cgit 1.3.0-6-gf8a5 From 396491ffd6f4428f72e749057da78bf042bf36b7 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:51:19 -0300 Subject: run 5s of tests instead of 1 (cur: 87k/sec) Why does this improve efficiency? I do not know. It's entirely possi8le the earlier estim8tions were just off, and this is more accur8te. --- upd8.js | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/upd8.js b/upd8.js index 4555b00..7396ac3 100755 --- a/upd8.js +++ b/upd8.js @@ -1175,13 +1175,14 @@ const replacerSpec = { { function test(input) { let n = 0; + const s = 5; const start = Date.now(); - const end = start + 1000; + const end = start + s * 1000; while (Date.now() < end) { transformInline.parse(input); n++; } - console.log(`Ran ${n} times.`); + console.log(`Ran ${Math.round(n / s)} times/sec.`); } test(fixWS` -- cgit 1.3.0-6-gf8a5 From fbfe596c99924431b3e7105ab3c568bbf26f3fef Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 13:53:23 -0300 Subject: don't global-hoist speedtest definition --- upd8.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/upd8.js b/upd8.js index 7396ac3..b1eb896 100755 --- a/upd8.js +++ b/upd8.js @@ -1173,7 +1173,7 @@ const replacerSpec = { } { - function test(input) { + const test = input => { let n = 0; const s = 5; const start = Date.now(); -- cgit 1.3.0-6-gf8a5 From e3baf8b7e3834b27a6ea6516e43d797570ff4c92 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 15:48:03 -0300 Subject: use regexes, avoid parse by letter (cur: 110k/sec) --- upd8-util.js | 8 ++++++ upd8.js | 88 ++++++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/upd8-util.js b/upd8-util.js index 30260f8..3293d68 100644 --- a/upd8-util.js +++ b/upd8-util.js @@ -427,3 +427,11 @@ module.exports.makeExtendedRegExp = (inputPatternStr, flags) => { .replace(/(^|[^\\])\s+/g, '$1'); return new RegExp(cleanedPatternStr, flags); }; + + +// Stolen from here: https://stackoverflow.com/a/3561711 +// +// There's a proposal for a native JS function like this, 8ut it's not even +// past stage 1 yet: https://github.com/tc39/proposal-regex-escaping +module.exports.escapeRegex = string => + string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); diff --git a/upd8.js b/upd8.js index b1eb896..72031b8 100755 --- a/upd8.js +++ b/upd8.js @@ -111,6 +111,7 @@ const { chunkByProperties, curry, decorateTime, + escapeRegex, filterEmptyLines, joinNoOxford, mapInPlace, @@ -979,6 +980,15 @@ const replacerSpec = { const tagArgumentValue = '='; const tagLabel = '|'; + const R_tagBeginning = escapeRegex(tagBeginning); + const R_tagEnding = escapeRegex(tagEnding); + const R_tagReplacerValue = escapeRegex(tagReplacerValue); + const R_tagArgument = escapeRegex(tagArgument); + const R_tagArgumentValue = escapeRegex(tagArgumentValue); + const R_tagLabel = escapeRegex(tagLabel); + + const regexpCache = {}; + const makeError = (i, message) => ({i, type: 'error', data: {message}}); const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); @@ -1009,45 +1019,62 @@ const replacerSpec = { } }; + const literalsToMatch = stopAt ? stopAt.concat([R_tagBeginning]) : [R_tagBeginning]; + + // The 8ackslash stuff here is to only match an even (or zero) num8er + // of sequential 'slashes. Even amounts always cancel out! Odd amounts + // don't, which would mean the following literal is 8eing escaped and + // should 8e counted only as part of the current string/text. + // + // Inspired 8y this: https://stackoverflow.com/a/41470813 + const regexpSource = `(? Date: Thu, 15 Apr 2021 15:53:02 -0300 Subject: remove unused original code --- upd8.js | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/upd8.js b/upd8.js index 72031b8..7ec6b55 100755 --- a/upd8.js +++ b/upd8.js @@ -961,17 +961,6 @@ const replacerSpec = { } if (error) process.exit(); - const categoryPart = Object.keys(replacerSpec).join('|'); - // transformInline.regexp = new RegExp(String.raw`(? Date: Thu, 15 Apr 2021 16:00:46 -0300 Subject: simple replaceKey/Value syntax (cur: 110k/sec) Not actually a speed-up, 8ut this is easier to read. Learn it from me: with no other 8enefits, prefer what's legi8le, not what's cool! At least in projects where the whole point isn't 8eing cool, anyway. --- upd8.js | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/upd8.js b/upd8.js index 7ec6b55..8d22f64 100755 --- a/upd8.js +++ b/upd8.js @@ -1097,13 +1097,19 @@ const replacerSpec = { // Assign first & second to replacer key/value + let replacerKey, + replacerValue; + // Value is an array of nodes, 8ut key is just one (or null). // So if we use replacerFirst as the value, we need to stick // it in an array (on its own). - const [ replacerKey, replacerValue ] = - (replacerSecond - ? [replacerFirst, replacerSecond] - : [null, [replacerFirst]]); + if (replacerSecond) { + replacerKey = replacerFirst; + replacerValue = replacerSecond; + } else { + replacerKey = null; + replacerValue = [replacerFirst]; + } // Arguments -- cgit 1.3.0-6-gf8a5 From c6a8b367a5a1a243e5112a33aedee9e9147f7a0f Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Fri, 16 Apr 2021 12:43:14 -0300 Subject: fix final text not being added as node --- upd8.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/upd8.js b/upd8.js index 8d22f64..6986ac8 100755 --- a/upd8.js +++ b/upd8.js @@ -1033,6 +1033,9 @@ const replacerSpec = { const match = input.slice(i).match(regexp); if (!match) { + iString = i; + string = input.slice(i, input.length); + pushTextNode(); break; } @@ -1158,7 +1161,6 @@ const replacerSpec = { } } - pushTextNode(); return nodes; }; -- cgit 1.3.0-6-gf8a5 From 32448fb8ff1c5474e25847c0cae397bdf1c6f5ab Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Fri, 16 Apr 2021 13:53:27 -0300 Subject: add hash to tag syntax --- upd8.js | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/upd8.js b/upd8.js index 6986ac8..504cd6d 100755 --- a/upd8.js +++ b/upd8.js @@ -965,6 +965,7 @@ const replacerSpec = { const tagBeginning = '[['; const tagEnding = ']]'; const tagReplacerValue = ':'; + const tagHash = '#'; const tagArgument = '*'; const tagArgumentValue = '='; const tagLabel = '|'; @@ -972,6 +973,7 @@ const replacerSpec = { const R_tagBeginning = escapeRegex(tagBeginning); const R_tagEnding = escapeRegex(tagEnding); const R_tagReplacerValue = escapeRegex(tagReplacerValue); + const R_tagHash = escapeRegex(tagHash); const R_tagArgument = escapeRegex(tagArgument); const R_tagArgumentValue = escapeRegex(tagArgumentValue); const R_tagLabel = escapeRegex(tagLabel); @@ -1066,7 +1068,7 @@ const replacerSpec = { // Replacer key (or value) - N = parseOneTextNode(input, i, [R_tagReplacerValue, R_tagArgument, R_tagLabel, R_tagEnding]); + N = parseOneTextNode(input, i, [R_tagReplacerValue, R_tagHash, R_tagArgument, R_tagLabel, R_tagEnding]); if (!stopped) throw endOfInput(i, `reading replacer key`); @@ -1074,8 +1076,9 @@ const replacerSpec = { switch (stop_literal) { case tagReplacerValue: case tagArgument: - case tagLabel: throw makeError(i, `Expected text (replacer key).`); + case tagLabel: + case tagHash: case tagEnding: throw makeError(i, `Expected text (replacer key/value).`); } @@ -1089,7 +1092,7 @@ const replacerSpec = { let replacerSecond; if (stop_literal === tagReplacerValue) { - N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]); + N = parseNodes(input, i, [R_tagHash, R_tagArgument, R_tagLabel, R_tagEnding]); if (!stopped) throw endOfInput(i, `reading replacer value`); if (!N.length) throw makeError(i, `Expected content (replacer value).`); @@ -1114,6 +1117,22 @@ const replacerSpec = { replacerValue = [replacerFirst]; } + // Hash + + let hash; + + if (stop_literal === tagHash) { + N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]); + + if (!stopped) throw endOfInput(i, `reading hash`); + + if (!N) + throw makeError(i, `Expected content (hash).`); + + hash = N; + i = stop_iParse; + } + // Arguments const args = []; @@ -1155,7 +1174,7 @@ const replacerSpec = { i = stop_iParse; } - nodes.push({i: iTag, type: 'tag', data: {replacerKey, replacerValue, args, label}}); + nodes.push({i: iTag, type: 'tag', data: {replacerKey, replacerValue, hash, args, label}}); continue; } -- cgit 1.3.0-6-gf8a5 From d1e808ee9585786a28a73b66ed1b20a58b63b80a Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Fri, 16 Apr 2021 13:54:01 -0300 Subject: add end index to node info --- upd8.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/upd8.js b/upd8.js index 504cd6d..1781107 100755 --- a/upd8.js +++ b/upd8.js @@ -1005,7 +1005,7 @@ const replacerSpec = { const pushTextNode = () => { if (string.length) { - nodes.push({i: iString, type: 'text', data: string}); + nodes.push({i: iString, iEnd: i, type: 'text', data: string}); string = ''; } }; @@ -1174,7 +1174,7 @@ const replacerSpec = { i = stop_iParse; } - nodes.push({i: iTag, type: 'tag', data: {replacerKey, replacerValue, hash, args, label}}); + nodes.push({i: iTag, iEnd: i, type: 'tag', data: {replacerKey, replacerValue, hash, args, label}}); continue; } -- cgit 1.3.0-6-gf8a5 From 094e5a621b80b21bbc03937e2b40e1c32df93315 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Fri, 16 Apr 2021 13:54:55 -0300 Subject: match hash/replacerValue only without preceding WS --- upd8.js | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/upd8.js b/upd8.js index 1781107..d2983e7 100755 --- a/upd8.js +++ b/upd8.js @@ -970,13 +970,30 @@ const replacerSpec = { const tagArgumentValue = '='; const tagLabel = '|'; - const R_tagBeginning = escapeRegex(tagBeginning); - const R_tagEnding = escapeRegex(tagEnding); - const R_tagReplacerValue = escapeRegex(tagReplacerValue); - const R_tagHash = escapeRegex(tagHash); - const R_tagArgument = escapeRegex(tagArgument); - const R_tagArgumentValue = escapeRegex(tagArgumentValue); - const R_tagLabel = escapeRegex(tagLabel); + const noPrecedingWhitespace = '(? Date: Fri, 16 Apr 2021 13:55:51 -0300 Subject: show line and column position in parse errors --- upd8.js | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/upd8.js b/upd8.js index d2983e7..17b3f28 100755 --- a/upd8.js +++ b/upd8.js @@ -1210,8 +1210,29 @@ const replacerSpec = { const { i, data: { message } } = errorNode; - // TODO: Visual line/surrounding characters presentation! - throw new SyntaxError(`Parse error (at pos ${i}): ${message}`); + let lineStart = input.slice(0, i).lastIndexOf('\n'); + if (lineStart >= 0) { + lineStart += 1; + } else { + lineStart = 0; + } + + let lineEnd = input.slice(i).indexOf('\n'); + if (lineEnd >= 0) { + lineEnd += i; + } else { + lineEnd = input.length; + } + + const line = input.slice(lineStart, lineEnd); + + const cursor = i - lineStart; + + throw new SyntaxError(fixWS` + Parse error (at pos ${i}): ${message} + ${line} + ${'-'.repeat(cursor) + '^'} + `); } }; } -- cgit 1.3.0-6-gf8a5 From 211edd269329c7d1ca73b6806030282b07e989ed Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Fri, 16 Apr 2021 13:56:50 -0300 Subject: connect parser to updated replacer & rest of code! --- upd8.js | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 69 insertions(+), 19 deletions(-) diff --git a/upd8.js b/upd8.js index 17b3f28..2be0eaa 100755 --- a/upd8.js +++ b/upd8.js @@ -879,8 +879,8 @@ const replacerSpec = { 'flash': { search: 'flash', link: 'flash', - transformName(name, search, offset, text) { - const nextCharacter = text[offset + search.length]; + transformName(name, node, input) { + const nextCharacter = input[node.iEnd]; const lastCharacter = name[name.length - 1]; if ( ![' ', '\n', '<'].includes(nextCharacter) && @@ -1237,6 +1237,7 @@ const replacerSpec = { }; } +/* { const show = input => process.stdout.write(`-- ${input}\n` + util.inspect( transformInline.parse(input), @@ -1269,11 +1270,22 @@ const replacerSpec = { `); process.exit(); } +*/ + +{ + const evaluateTag = function(node, opts) { + const { input, strings, to } = opts; -function transformInline(text, {strings, to}) { - return text.replace(transformInline.regexp, (match, _1, category, ref, hash, _2, enteredName, offset) => { - if (!category) { - category = 'track'; + const source = input.slice(node.i, node.iEnd); + + const replacerKey = node.data.replacerKey?.data || 'track'; + const replacerValue = transformNodes(node.data.replacerValue, opts); + const hash = node.data.hash && transformNodes(node.data.hash, opts); + const enteredLabel = node.data.label && transformNode(node.data.label, opts); + + if (!replacerSpec[replacerKey]) { + logWarn`The link ${source} has an invalid replacer key!`; + return source; } const { @@ -1282,28 +1294,28 @@ function transformInline(text, {strings, to}) { value: valueFn, html: htmlFn, transformName - } = replacerSpec[category]; + } = replacerSpec[replacerKey]; const value = ( - valueFn ? valueFn(ref) : - searchKey ? search[searchKey](ref) : + valueFn ? valueFn(replacerValue) : + searchKey ? search[searchKey](replacerValue) : { - directory: ref.replace(category + ':', ''), + directory: replacerValue, name: null }); if (!value) { - logWarn`The link ${match} does not match anything!`; - return match; + logWarn`The link ${search} does not match anything!`; + return search; } - const label = (enteredName - || transformName && transformName(value.name, match, offset, text) + const label = (enteredLabel + || transformName && transformName(value.name, node, input) || value.name); if (!valueFn && !label) { - logWarn`The link ${match} requires a label be entered!`; - return match; + logWarn`The link ${search} requires a label be entered!`; + return search; } const fn = (htmlFn @@ -1313,10 +1325,48 @@ function transformInline(text, {strings, to}) { try { return fn(value, {text: label, hash, strings, to}); } catch (error) { - logError`The link ${match} failed to be processed: ${error}`; - return match; + logError`The link ${source} failed to be processed: ${error}`; + return source; + } + }; + + const transformNode = function(node, opts) { + if (!node) { + throw new Error('Expected a node!'); + } + + if (Array.isArray(node)) { + throw new Error('Got an array - use transformNodes here!'); + } + + switch (node.type) { + case 'text': + return node.data; + case 'tag': + return evaluateTag(node, opts); + default: + throw new Error(`Unknown node type ${node.type}`); + } + }; + + const transformNodes = function(nodes, opts) { + if (!nodes || !Array.isArray(nodes)) { + throw new Error(`Expected an array of nodes! Got: ${nodes}`); } - }).replaceAll(String.raw`\[[`, '[['); + + return nodes.map(node => transformNode(node, opts)).join(''); + }; + + Object.assign(transformInline, { + evaluateTag, + transformNode, + transformNodes + }); +} + +function transformInline(input, {strings, to}) { + const nodes = transformInline.parse(input); + return transformInline.transformNodes(nodes, {strings, to, input}); } function parseAttributes(string, {to}) { -- cgit 1.3.0-6-gf8a5 From 40f6c3b14aacdb96e1b20b4c2e3e1365ccd372c4 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Mon, 19 Apr 2021 11:55:03 -0300 Subject: fix incorrect iEnd values on text nodes The iEnd value (which marks the position a node's length ends at in input text) is only used for tag nodes at the moment, 8ut it's still nice to have the right values everywhere, in case we do use it for something else later (and to just have code that functions like we want, lol). --- upd8.js | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/upd8.js b/upd8.js index 2be0eaa..de6af97 100755 --- a/upd8.js +++ b/upd8.js @@ -1021,6 +1021,7 @@ const replacerSpec = { stopped = false; const pushTextNode = () => { + string = input.slice(iString, i); if (string.length) { nodes.push({i: iString, iEnd: i, type: 'text', data: string}); string = ''; @@ -1053,7 +1054,7 @@ const replacerSpec = { if (!match) { iString = i; - string = input.slice(i, input.length); + i = input.length; pushTextNode(); break; } @@ -1062,17 +1063,17 @@ const replacerSpec = { const closestMatchIndex = i + match.index; iString = i; - string = input.slice(i, closestMatchIndex); + i = closestMatchIndex; pushTextNode(); - i = closestMatchIndex + closestMatch.length; + i += closestMatch.length; if (closestMatch !== tagBeginning) { stopped = true; stop_iMatch = closestMatchIndex; stop_iParse = i; stop_literal = closestMatch; - return nodes; + break; } if (closestMatch === tagBeginning) { -- cgit 1.3.0-6-gf8a5 From e7bff2495c2ce2b7c1dd01061616bf9e067f469c Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Mon, 19 Apr 2021 11:59:47 -0300 Subject: move textOnly tag test before advancing past "[[" This makes syntax error messages a little nicer, i.e. hello there [[album:[[lol]]]] --------------------^ instead of hello there [[album:[[lol]]]] ----------------------^ --- upd8.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/upd8.js b/upd8.js index de6af97..e425cb7 100755 --- a/upd8.js +++ b/upd8.js @@ -1066,6 +1066,9 @@ const replacerSpec = { i = closestMatchIndex; pushTextNode(); + if (textOnly && closestMatch === tagBeginning) + throw makeError(i, `Unexpected [[tag]] - expected only text here.`); + i += closestMatch.length; if (closestMatch !== tagBeginning) { @@ -1077,9 +1080,6 @@ const replacerSpec = { } if (closestMatch === tagBeginning) { - if (textOnly) - throw makeError(i, `Unexpected [[tag]] - expected only text here.`); - const iTag = closestMatchIndex; let N; -- cgit 1.3.0-6-gf8a5 From 503933ff15ab9cace4b71814ca2accb60572b707 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Mon, 19 Apr 2021 12:32:16 -0300 Subject: trim whitespace surrounding first/last text nodes --- upd8.js | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/upd8.js b/upd8.js index e425cb7..df6b5c6 100755 --- a/upd8.js +++ b/upd8.js @@ -1020,8 +1020,15 @@ const replacerSpec = { stopped = false; - const pushTextNode = () => { + const pushTextNode = (isLast) => { string = input.slice(iString, i); + + // If this is the last text node 8efore stopping (at a stopAt match + // or the end of the input), trim off whitespace at the end. + if (isLast) { + string = string.trimEnd(); + } + if (string.length) { nodes.push({i: iString, iEnd: i, type: 'text', data: string}); string = ''; @@ -1049,29 +1056,46 @@ const replacerSpec = { regexpCache[regexpSource] = regexp; } + // Skip whitespace at the start of parsing. This is run every time + // parseNodes is called (and thus parseOneTextNode too), so spaces + // at the start of syntax elements will always 8e skipped. We don't + // skip whitespace that shows up inside content (i.e. once we start + // parsing below), though! + const whitespaceOffset = input.slice(i).search(/[^\s]/); + + // If the string is all whitespace, that's just zero content, so + // return the empty nodes array. + if (whitespaceOffset === -1) { + return nodes; + } + + i += whitespaceOffset; + while (i < input.length) { const match = input.slice(i).match(regexp); if (!match) { iString = i; i = input.length; - pushTextNode(); + pushTextNode(true); break; } const closestMatch = match[0]; const closestMatchIndex = i + match.index; - iString = i; - i = closestMatchIndex; - pushTextNode(); - if (textOnly && closestMatch === tagBeginning) throw makeError(i, `Unexpected [[tag]] - expected only text here.`); + const stopHere = (closestMatch !== tagBeginning); + + iString = i; + i = closestMatchIndex; + pushTextNode(stopHere); + i += closestMatch.length; - if (closestMatch !== tagBeginning) { + if (stopHere) { stopped = true; stop_iMatch = closestMatchIndex; stop_iParse = i; -- cgit 1.3.0-6-gf8a5 From 405069fdd35849d4fede8183b14918e060d7b78c Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Mon, 19 Apr 2021 12:34:05 -0300 Subject: transform replacer arguments --- upd8.js | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/upd8.js b/upd8.js index df6b5c6..10b87cc 100755 --- a/upd8.js +++ b/upd8.js @@ -1304,9 +1304,6 @@ const replacerSpec = { const source = input.slice(node.i, node.iEnd); const replacerKey = node.data.replacerKey?.data || 'track'; - const replacerValue = transformNodes(node.data.replacerValue, opts); - const hash = node.data.hash && transformNodes(node.data.hash, opts); - const enteredLabel = node.data.label && transformNode(node.data.label, opts); if (!replacerSpec[replacerKey]) { logWarn`The link ${source} has an invalid replacer key!`; @@ -1321,6 +1318,8 @@ const replacerSpec = { transformName } = replacerSpec[replacerKey]; + const replacerValue = transformNodes(node.data.replacerValue, opts); + const value = ( valueFn ? valueFn(replacerValue) : searchKey ? search[searchKey](replacerValue) : @@ -1334,6 +1333,8 @@ const replacerSpec = { return search; } + const enteredLabel = node.data.label && transformNode(node.data.label, opts); + const label = (enteredLabel || transformName && transformName(value.name, node, input) || value.name); @@ -1343,12 +1344,20 @@ const replacerSpec = { return search; } + const hash = node.data.hash && transformNodes(node.data.hash, opts); + + const args = node.data.args && Object.fromEntries(node.data.args.map( + ({ key, value }) => [ + transformNode(key, opts), + transformNodes(value, opts) + ])); + const fn = (htmlFn ? htmlFn : strings.link[linkKey]); try { - return fn(value, {text: label, hash, strings, to}); + return fn(value, {text: label, hash, args, strings, to}); } catch (error) { logError`The link ${source} failed to be processed: ${error}`; return source; -- cgit 1.3.0-6-gf8a5