diff options
author | (quasar) nebula <towerofnix@gmail.com> | 2021-05-07 15:31:48 -0300 |
---|---|---|
committer | (quasar) nebula <towerofnix@gmail.com> | 2021-05-07 15:31:48 -0300 |
commit | cfb940ba021b1974c27b07dd0008b3bf065dad1e (patch) | |
tree | 8516adc329921f7ce1952df3d08e367f20400f98 | |
parent | 5522bb97469e50f7762259d310d9db246ef8ce4a (diff) | |
parent | 405069fdd35849d4fede8183b14918e060d7b78c (diff) |
Merge branch 'tf-parser' into staging
-rwxr-xr-x | src/upd8.js | 439 | ||||
-rw-r--r-- | src/util/sugar.js | 8 |
2 files changed, 425 insertions, 22 deletions
diff --git a/src/upd8.js b/src/upd8.js index 26479119..3c157d34 100755 --- a/src/upd8.js +++ b/src/upd8.js @@ -141,6 +141,7 @@ import { import { call, + escapeRegex, filterEmptyLines, mapInPlace, queue, @@ -794,8 +795,8 @@ const replacerSpec = { 'flash': { search: 'flash', link: 'flash', - transformName(name, search, offset, text) { - const nextCharacter = text[offset + search.length]; + transformName(name, node, input) { + const nextCharacter = input[node.iEnd]; const lastCharacter = name[name.length - 1]; if ( ![' ', '\n', '<'].includes(nextCharacter) && @@ -847,6 +848,11 @@ const replacerSpec = { search: 'staticPage', link: 'staticPage' }, + 'string': { + search: null, + value: ref => ref, + html: (ref, {strings, args}) => strings(ref, args) + }, 'tag': { search: 'tag', link: 'tag' @@ -871,14 +877,353 @@ const replacerSpec = { } if (error) process.exit(); - const categoryPart = Object.keys(replacerSpec).join('|'); - transformInline.regexp = new RegExp(String.raw`(?<!\\)\[\[((${categoryPart}):)?(.+?)((?<! )#.+?)?(\|(.+?))?\]\]`, 'g'); + // Syntax literals. + const tagBeginning = '[['; + const tagEnding = ']]'; + const tagReplacerValue = ':'; + const tagHash = '#'; + const tagArgument = '*'; + const tagArgumentValue = '='; + const tagLabel = '|'; + + const noPrecedingWhitespace = '(?<!\\s)'; + + const R_tagBeginning = + escapeRegex(tagBeginning); + + const R_tagEnding = + escapeRegex(tagEnding); + + const R_tagReplacerValue = + noPrecedingWhitespace + + escapeRegex(tagReplacerValue); + + const R_tagHash = + noPrecedingWhitespace + + escapeRegex(tagHash); + + const R_tagArgument = + escapeRegex(tagArgument); + + const R_tagArgumentValue = + escapeRegex(tagArgumentValue); + + const R_tagLabel = + escapeRegex(tagLabel); + + const regexpCache = {}; + + const makeError = (i, message) => ({i, type: 'error', data: {message}}); + const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); + + // These are 8asically stored on the glo8al scope, which might seem odd + // for a recursive function, 8ut the values are only ever used immediately + // after they're set. + let stopped, + stop_iMatch, + stop_iParse, + stop_literal; + + const parseOneTextNode = function(input, i, stopAt) { + return parseNodes(input, i, stopAt, true)[0]; + }; + + const parseNodes = function(input, i, stopAt, textOnly) { + let nodes = []; + let escapeNext = false; + let string = ''; + let iString = 0; + + stopped = false; + + const pushTextNode = (isLast) => { + string = input.slice(iString, i); + + // If this is the last text node 8efore stopping (at a stopAt match + // or the end of the input), trim off whitespace at the end. + if (isLast) { + string = string.trimEnd(); + } + + if (string.length) { + nodes.push({i: iString, iEnd: i, type: 'text', data: string}); + string = ''; + } + }; + + const literalsToMatch = stopAt ? stopAt.concat([R_tagBeginning]) : [R_tagBeginning]; + + // The 8ackslash stuff here is to only match an even (or zero) num8er + // of sequential 'slashes. Even amounts always cancel out! Odd amounts + // don't, which would mean the following literal is 8eing escaped and + // should 8e counted only as part of the current string/text. + // + // Inspired 8y this: https://stackoverflow.com/a/41470813 + const regexpSource = `(?<!\\\\)(?:\\\\{2})*(${literalsToMatch.join('|')})`; + + // There are 8asically only a few regular expressions we'll ever use, + // 8ut it's a pain to hard-code them all, so we dynamically gener8te + // and cache them for reuse instead. + let regexp; + if (regexpCache.hasOwnProperty(regexpSource)) { + regexp = regexpCache[regexpSource]; + } else { + regexp = new RegExp(regexpSource); + regexpCache[regexpSource] = regexp; + } + + // Skip whitespace at the start of parsing. This is run every time + // parseNodes is called (and thus parseOneTextNode too), so spaces + // at the start of syntax elements will always 8e skipped. We don't + // skip whitespace that shows up inside content (i.e. once we start + // parsing below), though! + const whitespaceOffset = input.slice(i).search(/[^\s]/); + + // If the string is all whitespace, that's just zero content, so + // return the empty nodes array. + if (whitespaceOffset === -1) { + return nodes; + } + + i += whitespaceOffset; + + while (i < input.length) { + const match = input.slice(i).match(regexp); + + if (!match) { + iString = i; + i = input.length; + pushTextNode(true); + break; + } + + const closestMatch = match[0]; + const closestMatchIndex = i + match.index; + + if (textOnly && closestMatch === tagBeginning) + throw makeError(i, `Unexpected [[tag]] - expected only text here.`); + + const stopHere = (closestMatch !== tagBeginning); + + iString = i; + i = closestMatchIndex; + pushTextNode(stopHere); + + i += closestMatch.length; + + if (stopHere) { + stopped = true; + stop_iMatch = closestMatchIndex; + stop_iParse = i; + stop_literal = closestMatch; + break; + } + + if (closestMatch === tagBeginning) { + const iTag = closestMatchIndex; + + let N; + + // Replacer key (or value) + + N = parseOneTextNode(input, i, [R_tagReplacerValue, R_tagHash, R_tagArgument, R_tagLabel, R_tagEnding]); + + if (!stopped) throw endOfInput(i, `reading replacer key`); + + if (!N) { + switch (stop_literal) { + case tagReplacerValue: + case tagArgument: + throw makeError(i, `Expected text (replacer key).`); + case tagLabel: + case tagHash: + case tagEnding: + throw makeError(i, `Expected text (replacer key/value).`); + } + } + + const replacerFirst = N; + i = stop_iParse; + + // Replacer value (if explicit) + + let replacerSecond; + + if (stop_literal === tagReplacerValue) { + N = parseNodes(input, i, [R_tagHash, R_tagArgument, R_tagLabel, R_tagEnding]); + + if (!stopped) throw endOfInput(i, `reading replacer value`); + if (!N.length) throw makeError(i, `Expected content (replacer value).`); + + replacerSecond = N; + i = stop_iParse + } + + // Assign first & second to replacer key/value + + let replacerKey, + replacerValue; + + // Value is an array of nodes, 8ut key is just one (or null). + // So if we use replacerFirst as the value, we need to stick + // it in an array (on its own). + if (replacerSecond) { + replacerKey = replacerFirst; + replacerValue = replacerSecond; + } else { + replacerKey = null; + replacerValue = [replacerFirst]; + } + + // Hash + + let hash; + + if (stop_literal === tagHash) { + N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]); + + if (!stopped) throw endOfInput(i, `reading hash`); + + if (!N) + throw makeError(i, `Expected content (hash).`); + + hash = N; + i = stop_iParse; + } + + // Arguments + + const args = []; + + while (stop_literal === tagArgument) { + N = parseOneTextNode(input, i, [R_tagArgumentValue, R_tagArgument, R_tagLabel, R_tagEnding]); + + if (!stopped) throw endOfInput(i, `reading argument key`); + + if (stop_literal !== tagArgumentValue) + throw makeError(i, `Expected ${tagArgumentValue.literal} (tag argument).`); + + if (!N) + throw makeError(i, `Expected text (argument key).`); + + const key = N; + i = stop_iParse; + + N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]); + + if (!stopped) throw endOfInput(i, `reading argument value`); + if (!N.length) throw makeError(i, `Expected content (argument value).`); + + const value = N; + i = stop_iParse; + + args.push({key, value}); + } + + let label; + + if (stop_literal === tagLabel) { + N = parseOneTextNode(input, i, [R_tagEnding]); + + if (!stopped) throw endOfInput(i, `reading label`); + if (!N) throw makeError(i, `Expected text (label).`); + + label = N; + i = stop_iParse; + } + + nodes.push({i: iTag, iEnd: i, type: 'tag', data: {replacerKey, replacerValue, hash, args, label}}); + + continue; + } + } + + return nodes; + }; + + transformInline.parse = function(input) { + try { + return parseNodes(input, 0); + } catch (errorNode) { + if (errorNode.type !== 'error') { + throw errorNode; + } + + const { i, data: { message } } = errorNode; + + let lineStart = input.slice(0, i).lastIndexOf('\n'); + if (lineStart >= 0) { + lineStart += 1; + } else { + lineStart = 0; + } + + let lineEnd = input.slice(i).indexOf('\n'); + if (lineEnd >= 0) { + lineEnd += i; + } else { + lineEnd = input.length; + } + + const line = input.slice(lineStart, lineEnd); + + const cursor = i - lineStart; + + throw new SyntaxError(fixWS` + Parse error (at pos ${i}): ${message} + ${line} + ${'-'.repeat(cursor) + '^'} + `); + } + }; +} + +/* +{ + const show = input => process.stdout.write(`-- ${input}\n` + util.inspect( + transformInline.parse(input), + { + depth: null, + colors: true + } + ) + '\n\n'); + + show(`[[album:are-you-lost|Cristata's new album]]`); + show(`[[string:content.donate.patreonLine*link=[[external:https://www.patreon.com/qznebula|Patreon]]]]`); +} + +{ + const test = input => { + let n = 0; + const s = 5; + const start = Date.now(); + const end = start + s * 1000; + while (Date.now() < end) { + transformInline.parse(input); + n++; + } + console.log(`Ran ${Math.round(n / s)} times/sec.`); + } + + test(fixWS` + [[string:content.donate.patreonLine*link=[[external:https://www.patreon.com/qznebula|Patreon]]]] + Hello, world! Wow [[album:the-beans-zone]] is some cool stuff. + `); + process.exit(); } +*/ + +{ + const evaluateTag = function(node, opts) { + const { input, strings, to } = opts; + + const source = input.slice(node.i, node.iEnd); -function transformInline(text, {strings, to}) { - return text.replace(transformInline.regexp, (match, _1, category, ref, hash, _2, enteredName, offset) => { - if (!category) { - category = 'track'; + const replacerKey = node.data.replacerKey?.data || 'track'; + + if (!replacerSpec[replacerKey]) { + logWarn`The link ${source} has an invalid replacer key!`; + return source; } const { @@ -887,41 +1232,91 @@ function transformInline(text, {strings, to}) { value: valueFn, html: htmlFn, transformName - } = replacerSpec[category]; + } = replacerSpec[replacerKey]; + + const replacerValue = transformNodes(node.data.replacerValue, opts); const value = ( - valueFn ? valueFn(ref) : - searchKey ? search[searchKey](ref) : + valueFn ? valueFn(replacerValue) : + searchKey ? search[searchKey](replacerValue) : { - directory: ref.replace(category + ':', ''), + directory: replacerValue, name: null }); if (!value) { - logWarn`The link ${match} does not match anything!`; - return match; + logWarn`The link ${search} does not match anything!`; + return search; } - const label = (enteredName - || transformName && transformName(value.name, match, offset, text) + const enteredLabel = node.data.label && transformNode(node.data.label, opts); + + const label = (enteredLabel + || transformName && transformName(value.name, node, input) || value.name); if (!valueFn && !label) { - logWarn`The link ${match} requires a label be entered!`; - return match; + logWarn`The link ${search} requires a label be entered!`; + return search; } + const hash = node.data.hash && transformNodes(node.data.hash, opts); + + const args = node.data.args && Object.fromEntries(node.data.args.map( + ({ key, value }) => [ + transformNode(key, opts), + transformNodes(value, opts) + ])); + const fn = (htmlFn ? htmlFn : strings.link[linkKey]); try { - return fn(value, {text: label, hash, strings, to}); + return fn(value, {text: label, hash, args, strings, to}); } catch (error) { - logError`The link ${match} failed to be processed: ${error}`; - return match; + logError`The link ${source} failed to be processed: ${error}`; + return source; + } + }; + + const transformNode = function(node, opts) { + if (!node) { + throw new Error('Expected a node!'); + } + + if (Array.isArray(node)) { + throw new Error('Got an array - use transformNodes here!'); + } + + switch (node.type) { + case 'text': + return node.data; + case 'tag': + return evaluateTag(node, opts); + default: + throw new Error(`Unknown node type ${node.type}`); } - }).replaceAll(String.raw`\[[`, '[['); + }; + + const transformNodes = function(nodes, opts) { + if (!nodes || !Array.isArray(nodes)) { + throw new Error(`Expected an array of nodes! Got: ${nodes}`); + } + + return nodes.map(node => transformNode(node, opts)).join(''); + }; + + Object.assign(transformInline, { + evaluateTag, + transformNode, + transformNodes + }); +} + +function transformInline(input, {strings, to}) { + const nodes = transformInline.parse(input); + return transformInline.transformNodes(nodes, {strings, to, input}); } function parseAttributes(string, {to}) { diff --git a/src/util/sugar.js b/src/util/sugar.js index 9970bad1..c24c617c 100644 --- a/src/util/sugar.js +++ b/src/util/sugar.js @@ -68,3 +68,11 @@ export function queue(array, max = 50) { export function delay(ms) { return new Promise(res => setTimeout(res, ms)); } + +// Stolen from here: https://stackoverflow.com/a/3561711 +// +// There's a proposal for a native JS function like this, 8ut it's not even +// past stage 1 yet: https://github.com/tc39/proposal-regex-escaping +export function escapeRegex(string) { + return string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); +} |