From e3baf8b7e3834b27a6ea6516e43d797570ff4c92 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 15 Apr 2021 15:48:03 -0300 Subject: use regexes, avoid parse by letter (cur: 110k/sec) --- upd8-util.js | 8 ++++++ upd8.js | 88 ++++++++++++++++++++++++++++++++++++++---------------------- 2 files changed, 64 insertions(+), 32 deletions(-) diff --git a/upd8-util.js b/upd8-util.js index 30260f8..3293d68 100644 --- a/upd8-util.js +++ b/upd8-util.js @@ -427,3 +427,11 @@ module.exports.makeExtendedRegExp = (inputPatternStr, flags) => { .replace(/(^|[^\\])\s+/g, '$1'); return new RegExp(cleanedPatternStr, flags); }; + + +// Stolen from here: https://stackoverflow.com/a/3561711 +// +// There's a proposal for a native JS function like this, 8ut it's not even +// past stage 1 yet: https://github.com/tc39/proposal-regex-escaping +module.exports.escapeRegex = string => + string.replace(/[-\/\\^$*+?.()|[\]{}]/g, '\\$&'); diff --git a/upd8.js b/upd8.js index b1eb896..72031b8 100755 --- a/upd8.js +++ b/upd8.js @@ -111,6 +111,7 @@ const { chunkByProperties, curry, decorateTime, + escapeRegex, filterEmptyLines, joinNoOxford, mapInPlace, @@ -979,6 +980,15 @@ const replacerSpec = { const tagArgumentValue = '='; const tagLabel = '|'; + const R_tagBeginning = escapeRegex(tagBeginning); + const R_tagEnding = escapeRegex(tagEnding); + const R_tagReplacerValue = escapeRegex(tagReplacerValue); + const R_tagArgument = escapeRegex(tagArgument); + const R_tagArgumentValue = escapeRegex(tagArgumentValue); + const R_tagLabel = escapeRegex(tagLabel); + + const regexpCache = {}; + const makeError = (i, message) => ({i, type: 'error', data: {message}}); const endOfInput = (i, comment) => makeError(i, `Unexpected end of input (${comment}).`); @@ -1009,45 +1019,62 @@ const replacerSpec = { } }; + const literalsToMatch = stopAt ? stopAt.concat([R_tagBeginning]) : [R_tagBeginning]; + + // The 8ackslash stuff here is to only match an even (or zero) num8er + // of sequential 'slashes. Even amounts always cancel out! Odd amounts + // don't, which would mean the following literal is 8eing escaped and + // should 8e counted only as part of the current string/text. + // + // Inspired 8y this: https://stackoverflow.com/a/41470813 + const regexpSource = `(?