diff options
Diffstat (limited to 'src/common-util/wiki-data.js')
| -rw-r--r-- | src/common-util/wiki-data.js | 76 |
1 files changed, 57 insertions, 19 deletions
diff --git a/src/common-util/wiki-data.js b/src/common-util/wiki-data.js index 546f1ad9..1668f110 100644 --- a/src/common-util/wiki-data.js +++ b/src/common-util/wiki-data.js @@ -11,7 +11,7 @@ export {filterMultipleArrays} from './sugar.js'; // Generic value operations -export function getKebabCase(name) { +export function getCaseSensitiveKebabCase(name) { return name // Spaces to dashes @@ -34,6 +34,9 @@ export function getKebabCase(name) { // General punctuation which always separates surrounding words .replace(/[/@#$%*()_=,[\]{}|\\;:<>?`~]/g, '-') + // More punctuation which always separates surrounding words + .replace(/[\u{2013}-\u{2014}]/u, '-') // En Dash, Em Dash + // Accented characters .replace(/[áâäàå]/gi, 'a') .replace(/[çč]/gi, 'c') @@ -50,9 +53,10 @@ export function getKebabCase(name) { // Trim dashes on boundaries .replace(/^-+|-+$/g, '') +} - // Always lowercase - .toLowerCase(); +export function getKebabCase(name) { + return getCaseSensitiveKebabCase(name).toLowerCase(); } // Specific data utilities @@ -102,6 +106,8 @@ export const commentaryRegexCaseSensitive = export const commentaryRegexCaseSensitiveOneShot = new RegExp(commentaryRegexRaw); +export const languageOptionRegex = /{(?<name>[A-Z0-9_]+)}/g; + // The #validators function isOldStyleLyrics() describes // what this regular expression detects against. export const multipleLyricsDetectionRegex = @@ -113,10 +119,16 @@ export function matchContentEntries(sourceText) { let previousMatchEntry = null; let previousEndIndex = null; + const trimBody = body => + body + .replace(/^\n*/, '') + .replace(/\n*$/, ''); + for (const {0: matchText, index: startIndex, groups: matchEntry} of sourceText.matchAll(commentaryRegexCaseSensitive)) { if (previousMatchEntry) { - previousMatchEntry.body = sourceText.slice(previousEndIndex, startIndex); + previousMatchEntry.body = + trimBody(sourceText.slice(previousEndIndex, startIndex)); } matchEntries.push(matchEntry); @@ -126,7 +138,8 @@ export function matchContentEntries(sourceText) { } if (previousMatchEntry) { - previousMatchEntry.body = sourceText.slice(previousEndIndex); + previousMatchEntry.body = + trimBody(sourceText.slice(previousEndIndex)); } return matchEntries; @@ -526,26 +539,51 @@ export function combineWikiDataArrays(arrays) { // Markdown stuff export function* matchMarkdownLinks(markdownSource, {marked}) { - const plausibleLinkRegexp = /\[.*?\)/g; + const plausibleLinkRegexp = /\[(?=.*?\))/g; + + const lexer = new marked.Lexer(); + + // This is just an optimization. Don't let Marked try to process tokens + // recursively, i.e. within the text/label of the link. We only care about + // the text itself, as a string. + lexer.inlineTokens = x => []; + + // This is cheating, because the lexer's tokenizer is a private property, + // but we can apparently access it anyway. + const {tokenizer} = lexer; let plausibleMatch = null; while (plausibleMatch = plausibleLinkRegexp.exec(markdownSource)) { - // Pedantic rules use more particular parentheses detection in link - // destinations - they allow one level of balanced parentheses, and - // otherwise, parentheses must be escaped. This allows for entire links - // to be wrapped in parentheses, e.g below: - // - // This is so cool. ([You know??](https://example.com)) - // + const {index} = plausibleMatch; + const definiteMatch = - marked.Lexer.rules.inline.pedantic.link - .exec(markdownSource.slice(plausibleMatch.index)); + tokenizer.link(markdownSource.slice(index)); + + if (!definiteMatch) { + continue; + } - if (definiteMatch) { - const [{length}, label, href] = definiteMatch; - const index = plausibleMatch.index + definiteMatch.index; + const {raw: {length}, text: label, href} = definiteMatch; - yield {label, href, index, length}; + yield {label, href, index, length}; + } +} + +export function* matchInlineLinks(source) { + const plausibleLinkRegexp = /\b[a-z]*:\/\/[^ ]*?(?=(?:[,.!?]*)(?:\s|$))/gm; + + let plausibleMatch = null; + while (plausibleMatch = plausibleLinkRegexp.exec(source)) { + const [href] = plausibleMatch; + const {index} = plausibleMatch; + const [{length}] = plausibleMatch; + + try { + new URL(href); + } catch { + continue; } + + yield {href, length, index}; } } |