diff options
author | (quasar) nebula <towerofnix@gmail.com> | 2021-04-15 15:48:03 -0300 |
---|---|---|
committer | (quasar) nebula <towerofnix@gmail.com> | 2021-04-15 15:48:03 -0300 |
commit | e3baf8b7e3834b27a6ea6516e43d797570ff4c92 (patch) | |
tree | 3218cdcda2c13169981eec2ae1fda8fa1ed05641 | |
parent | fbfe596c99924431b3e7105ab3c568bbf26f3fef (diff) |
use regexes, avoid parse by letter (cur: 110k/sec)
-rw-r--r-- | upd8-util.js | 8 | ||||
-rwxr-xr-x | upd8.js | 88 |
2 files changed, 64 insertions, 32 deletions
diff --git a/upd8-util.js b/upd8-util.js index 30260f8d..3293d68a 100644 --- a/upd8-util.js +++ b/upd8-util.js @@ -427,3 +427,11 @@ module.exports.makeExtendedRegExp = (inputPatternStr, flags) => { .replace(/(^|[^\\])\s+/g, '$1'); return new RegExp(cleanedPatternStr, flags); }; + + +// Stolen from here: https://stackoverflow.com/a/3561711 +// +// There's a proposal for a native JS function like this, 8ut it's not even +// past stage 1 yet: https://github.com/tc39/proposal-regex-escaping +module.exports.escapeRegex = string => + string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); diff --git a/upd8.js b/upd8.js index b1eb8966..72031b80 100755 --- a/upd8.js +++ b/upd8.js @@ -111,6 +111,7 @@ const { chunkByProperties, curry, decorateTime, + escapeRegex, filterEmptyLines, joinNoOxford, mapInPlace, @@ -979,6 +980,15 @@ const replacerSpec = { const tagArgumentValue = '='; const tagLabel = '|'; + const R_tagBeginning = escapeRegex(tagBeginning); + const R_tagEnding = escapeRegex(tagEnding); + const R_tagReplacerValue = escapeRegex(tagReplacerValue); + const R_tagArgument = escapeRegex(tagArgument); + const R_tagArgumentValue = escapeRegex(tagArgumentValue); + const R_tagLabel = escapeRegex(tagLabel); + + const regexpCache = {}; + const makeError = (i, message) => ({i, type: 'error', data: {message}}); const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); @@ -1009,45 +1019,62 @@ const replacerSpec = { } }; + const literalsToMatch = stopAt ? stopAt.concat([R_tagBeginning]) : [R_tagBeginning]; + + // The 8ackslash stuff here is to only match an even (or zero) num8er + // of sequential 'slashes. Even amounts always cancel out! Odd amounts + // don't, which would mean the following literal is 8eing escaped and + // should 8e counted only as part of the current string/text. + // + // Inspired 8y this: https://stackoverflow.com/a/41470813 + const regexpSource = `(?<!\\\\)(?:\\\\{2})*(${literalsToMatch.join('|')})`; + + // There are 8asically only a few regular expressions we'll ever use, + // 8ut it's a pain to hard-code them all, so we dynamically gener8te + // and cache them for reuse instead. + let regexp; + if (regexpCache.hasOwnProperty(regexpSource)) { + regexp = regexpCache[regexpSource]; + } else { + regexp = new RegExp(regexpSource); + regexpCache[regexpSource] = regexp; + } + while (i < input.length) { - if (escapeNext) { - string += input[i]; - i++; - continue; - } + const match = input.slice(i).match(regexp); - if (input[i] === '\\') { - escapeNext = true; - i++; - continue; + if (!match) { + break; } - if (stopAt) { - for (const literal of stopAt) { - if (input.slice(i, i + literal.length) === literal) { - pushTextNode(); - stopped = true; - stop_iMatch = i; - stop_iParse = i + literal.length; - stop_literal = literal; - return nodes; - } - } + const closestMatch = match[0]; + const closestMatchIndex = i + match.index; + + iString = i; + string = input.slice(i, closestMatchIndex); + pushTextNode(); + + i = closestMatchIndex + closestMatch.length; + + if (closestMatch !== tagBeginning) { + stopped = true; + stop_iMatch = closestMatchIndex; + stop_iParse = i; + stop_literal = closestMatch; + return nodes; } - if (input.slice(i, i + tagBeginning.length) === tagBeginning) { + if (closestMatch === tagBeginning) { if (textOnly) throw makeError(i, `Unexpected [[tag]] - expected only text here.`); - pushTextNode(); - const iTag = i; - i += tagBeginning.length; + const iTag = closestMatchIndex; let N; // Replacer key (or value) - N = parseOneTextNode(input, i, [tagReplacerValue, tagArgument, tagLabel, tagEnding]); + N = parseOneTextNode(input, i, [R_tagReplacerValue, R_tagArgument, R_tagLabel, R_tagEnding]); if (!stopped) throw endOfInput(i, `reading replacer key`); @@ -1070,7 +1097,7 @@ const replacerSpec = { let replacerSecond; if (stop_literal === tagReplacerValue) { - N = parseNodes(input, i, [tagArgument, tagLabel, tagEnding]); + N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]); if (!stopped) throw endOfInput(i, `reading replacer value`); if (!N.length) throw makeError(i, `Expected content (replacer value).`); @@ -1094,7 +1121,7 @@ const replacerSpec = { const args = []; while (stop_literal === tagArgument) { - N = parseOneTextNode(input, i, [tagArgumentValue, tagArgument, tagLabel, tagEnding]); + N = parseOneTextNode(input, i, [R_tagArgumentValue, R_tagArgument, R_tagLabel, R_tagEnding]); if (!stopped) throw endOfInput(i, `reading argument key`); @@ -1107,7 +1134,7 @@ const replacerSpec = { const key = N; i = stop_iParse; - N = parseNodes(input, i, [tagArgument, tagLabel, tagEnding]); + N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]); if (!stopped) throw endOfInput(i, `reading argument value`); if (!N.length) throw makeError(i, `Expected content (argument value).`); @@ -1121,7 +1148,7 @@ const replacerSpec = { let label; if (stop_literal === tagLabel) { - N = parseOneTextNode(input, i, [tagEnding]); + N = parseOneTextNode(input, i, [R_tagEnding]); if (!stopped) throw endOfInput(i, `reading label`); if (!N) throw makeError(i, `Expected text (label).`); @@ -1134,9 +1161,6 @@ const replacerSpec = { continue; } - - string += input[i]; - i++; } pushTextNode(); |