diff options
Diffstat (limited to 'src/util/replacer.js')
-rw-r--r-- | src/util/replacer.js | 549 |
1 files changed, 400 insertions, 149 deletions
diff --git a/src/util/replacer.js b/src/util/replacer.js index 9d602ca..d1b0a26 100644 --- a/src/util/replacer.js +++ b/src/util/replacer.js @@ -1,22 +1,150 @@ -import {logError, logWarn} from './cli.js'; -import {escapeRegex} from './sugar.js'; - -export function validateReplacerSpec(replacerSpec, {find, link}) { - let success = true; - - for (const [key, {link: linkKey, find: findKey, html}] of Object.entries(replacerSpec)) { - if (!html && !link[linkKey]) { - logError`The replacer spec ${key} has invalid link key ${linkKey}! Specify it in link specs or fix typo.`; - success = false; - } - if (findKey && !find[findKey]) { - logError`The replacer spec ${key} has invalid find key ${findKey}! Specify it in find specs or fix typo.`; - success = false; - } - } - - return success; -} +// Regex-based forward parser for wiki content, breaking up text input into +// text and (possibly nested) tag nodes. +// +// The behavior here is quite tied into the `transformContent` content +// function, which converts nodes parsed here into actual HTML, links, etc +// for embedding in a wiki webpage. + +import * as marked from 'marked'; + +import * as html from '#html'; +import {escapeRegex, typeAppearance} from '#sugar'; + +export const replacerSpec = { + 'album': { + find: 'album', + link: 'linkAlbum', + }, + + 'album-commentary': { + find: 'album', + link: 'linkAlbumCommentary', + }, + + 'album-gallery': { + find: 'album', + link: 'linkAlbumGallery', + }, + + 'artist': { + find: 'artist', + link: 'linkArtist', + }, + + 'artist-gallery': { + find: 'artist', + link: 'linkArtistGallery', + }, + + 'commentary-index': { + find: null, + link: 'linkCommentaryIndex', + }, + + 'date': { + find: null, + value: (ref) => new Date(ref), + html: (date, {html, language}) => + html.tag('time', + {datetime: date.toUTCString()}, + language.formatDate(date)), + }, + + 'flash-index': { + find: null, + link: 'linkFlashIndex', + }, + + 'flash': { + find: 'flash', + link: 'linkFlash', + transformName(name, node, input) { + const nextCharacter = input[node.iEnd]; + const lastCharacter = name[name.length - 1]; + if (![' ', '\n', '<'].includes(nextCharacter) && lastCharacter === '.') { + return name.slice(0, -1); + } else { + return name; + } + }, + }, + + 'flash-act': { + find: 'flashAct', + link: 'linkFlashAct', + }, + + 'group': { + find: 'group', + link: 'linkGroup', + }, + + 'group-gallery': { + find: 'group', + link: 'linkGroupGallery', + }, + + 'home': { + find: null, + link: 'linkWikiHome', + }, + + 'listing-index': { + find: null, + link: 'linkListingIndex', + }, + + 'listing': { + find: 'listing', + link: 'linkListing', + }, + + 'media': { + find: null, + link: 'linkPathFromMedia', + }, + + 'news-index': { + find: null, + link: 'linkNewsIndex', + }, + + 'news-entry': { + find: 'newsEntry', + link: 'linkNewsEntry', + }, + + 'root': { + find: null, + link: 'linkPathFromRoot', + }, + + 'site': { + find: null, + link: 'linkPathFromSite', + }, + + 'static': { + find: 'staticPage', + link: 'linkStaticPage', + }, + + 'string': { + find: null, + value: (ref) => ref, + html: (ref, {language, args}) => language.$(ref, args), + }, + + 'tag': { + find: 'artTag', + link: 'linkArtTag', + }, + + 'track': { + find: 'track', + link: 'linkTrackDynamically', + }, +}; // Syntax literals. const tagBeginning = '[['; @@ -75,6 +203,8 @@ function parseNodes(input, i, stopAt, textOnly) { string = string.trimEnd(); } + string = cleanRawText(string); + if (string.length) { nodes.push({i: iString, iEnd: i, type: 'text', data: string}); string = ''; @@ -221,11 +351,10 @@ function parseNodes(input, i, stopAt, textOnly) { let hash; if (stop_literal === tagHash) { - N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]); + N = parseOneTextNode(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]); if (!stopped) throw endOfInput(i, `reading hash`); - - if (!N) throw makeError(i, `Expected content (hash).`); + if (!N) throw makeError(i, `Expected text (hash).`); hash = N; i = stop_iParse; @@ -293,9 +422,257 @@ function parseNodes(input, i, stopAt, textOnly) { return nodes; } +export function squashBackslashes(text) { + // Squash backslashes which aren't themselves escaped into + // the following character, unless that character is one of + // a set of characters where the backslash carries meaning + // into later formatting (i.e. markdown). Note that we do + // NOT compress double backslashes into single backslashes. + return text.replace(/([^\\](?:\\{2})*)\\(?![\\*_-])/g, '$1'); +} + +export function restoreRawHTMLTags(text) { + // Replace stuff like <html:a> with <a>; these signal that + // the tag shouldn't be processed by the replacer system, + // and should just be embedded into the content as raw HTML. + return text.replace(/<html:(.*?)(?=[ >])/g, '<$1'); +} + +export function cleanRawText(text) { + text = squashBackslashes(text); + text = restoreRawHTMLTags(text); + return text; +} + +export function postprocessImages(inputNodes) { + const outputNodes = []; + + let atStartOfLine = true; + + const lastNode = inputNodes.at(-1); + + for (const node of inputNodes) { + if (node.type === 'tag') { + atStartOfLine = false; + } + + if (node.type === 'text') { + const imageRegexp = /<img (.*?)>/g; + + let match = null, parseFrom = 0; + while (match = imageRegexp.exec(node.data)) { + const previousText = node.data.slice(parseFrom, match.index); + + outputNodes.push({ + type: 'text', + data: previousText, + i: node.i + parseFrom, + iEnd: node.i + parseFrom + match.index, + }); + + parseFrom = match.index + match[0].length; + + const imageNode = {type: 'image'}; + const attributes = html.parseAttributes(match[1]); + + imageNode.src = attributes.get('src'); + + if (previousText.endsWith('\n')) { + atStartOfLine = true; + } else if (previousText.length) { + atStartOfLine = false; + } + + imageNode.inline = (() => { + // Images can force themselves to be rendered inline using a custom + // attribute - this style just works better for certain embeds, + // usually jokes or small images. + if (attributes.get('inline')) return true; + + // If we've already determined we're in the middle of a line, + // we're inline. (Of course!) + if (!atStartOfLine) { + return true; + } + + // If there's more text to go in this text node, and what's + // remaining doesn't start with a line break, we're inline. + if ( + parseFrom !== node.data.length && + node.data[parseFrom] !== '\n' + ) { + return true; + } + + // If we're at the end of this text node, but this text node + // isn't the last node overall, we're inline. + if ( + parseFrom === node.data.length && + node !== lastNode + ) { + return true; + } + + // If no other condition matches, this image is on its own line. + return false; + })(); + + if (attributes.get('link')) imageNode.link = attributes.get('link'); + if (attributes.get('style')) imageNode.style = attributes.get('style'); + if (attributes.get('width')) imageNode.width = parseInt(attributes.get('width')); + if (attributes.get('height')) imageNode.height = parseInt(attributes.get('height')); + if (attributes.get('align')) imageNode.align = attributes.get('align'); + if (attributes.get('pixelate')) imageNode.pixelate = true; + + if (attributes.get('warning')) { + imageNode.warnings = + attributes.get('warning').split(', '); + } + + outputNodes.push(imageNode); + + // No longer at the start of a line after an image - there will at + // least be a text node with only '\n' before the next image that's + // on its own line. + atStartOfLine = false; + } + + if (parseFrom !== node.data.length) { + outputNodes.push({ + type: 'text', + data: node.data.slice(parseFrom), + i: node.i + parseFrom, + iEnd: node.iEnd, + }); + } + + continue; + } + + outputNodes.push(node); + } + + return outputNodes; +} + +export function postprocessHeadings(inputNodes) { + const outputNodes = []; + + for (const node of inputNodes) { + if (node.type !== 'text') { + outputNodes.push(node); + continue; + } + + const headingRegexp = /<h2 (.*?)>/g; + + let textContent = ''; + + let match = null, parseFrom = 0; + while (match = headingRegexp.exec(node.data)) { + textContent += node.data.slice(parseFrom, match.index); + parseFrom = match.index + match[0].length; + + const attributes = html.parseAttributes(match[1]); + attributes.push('class', 'content-heading'); + + // We're only modifying the opening tag here. The remaining content, + // including the closing tag, will be pushed as-is. + textContent += `<h2 ${attributes}>`; + } + + if (parseFrom !== node.data.length) { + textContent += node.data.slice(parseFrom); + } + + outputNodes.push({ + type: 'text', + data: textContent, + i: node.i, + iEnd: node.iEnd, + }); + } + + return outputNodes; +} + +export function postprocessExternalLinks(inputNodes) { + const outputNodes = []; + + for (const node of inputNodes) { + if (node.type !== 'text') { + outputNodes.push(node); + continue; + } + + const plausibleLinkRegexp = /\[.*?\)/g; + + let textContent = ''; + + let plausibleMatch = null, parseFrom = 0; + while (plausibleMatch = plausibleLinkRegexp.exec(node.data)) { + textContent += node.data.slice(parseFrom, plausibleMatch.index); + + // Pedantic rules use more particular parentheses detection in link + // destinations - they allow one level of balanced parentheses, and + // otherwise, parentheses must be escaped. This allows for entire links + // to be wrapped in parentheses, e.g below: + // + // This is so cool. ([You know??](https://example.com)) + // + const definiteMatch = + marked.Lexer.rules.inline.pedantic.link + .exec(node.data.slice(plausibleMatch.index)); + + if (definiteMatch) { + const {1: label, 2: href} = definiteMatch; + + // Split the containing text node into two - the second of these will + // be added after iterating over matches, or by the next match. + if (textContent.length) { + outputNodes.push({type: 'text', data: textContent}); + textContent = ''; + } + + const offset = plausibleMatch.index + definiteMatch.index; + const length = definiteMatch[0].length; + + outputNodes.push({ + i: node.i + offset, + iEnd: node.i + offset + length, + type: 'external-link', + data: {label, href}, + }); + + parseFrom = offset + length; + } else { + parseFrom = plausibleMatch.index; + } + } + + if (parseFrom !== node.data.length) { + textContent += node.data.slice(parseFrom); + } + + if (textContent.length) { + outputNodes.push({type: 'text', data: textContent}); + } + } + + return outputNodes; +} + export function parseInput(input) { + if (typeof input !== 'string') { + throw new TypeError(`Expected input to be string, got ${typeAppearance(input)}`); + } + try { - return parseNodes(input, 0); + let output = parseNodes(input, 0); + output = postprocessImages(output); + output = postprocessHeadings(output); + output = postprocessExternalLinks(output); + return output; } catch (errorNode) { if (errorNode.type !== 'error') { throw errorNode; @@ -331,129 +708,3 @@ export function parseInput(input) { ].join('\n')); } } - -function evaluateTag(node, opts) { - const {find, input, language, link, replacerSpec, to} = opts; - - const source = input.slice(node.i, node.iEnd); - - const replacerKeyImplied = !node.data.replacerKey; - const replacerKey = replacerKeyImplied ? 'track' : node.data.replacerKey.data; - - if (!replacerSpec[replacerKey]) { - logWarn`The link ${source} has an invalid replacer key!`; - return source; - } - - const { - find: findKey, - link: linkKey, - value: valueFn, - html: htmlFn, - transformName, - } = replacerSpec[replacerKey]; - - const replacerValue = transformNodes(node.data.replacerValue, opts); - - const value = valueFn - ? valueFn(replacerValue) - : findKey - ? find[findKey]( - replacerKeyImplied ? replacerValue : replacerKey + `:` + replacerValue - ) - : { - directory: replacerValue, - name: null, - }; - - if (!value) { - logWarn`The link ${source} does not match anything!`; - return source; - } - - const enteredLabel = node.data.label && transformNode(node.data.label, opts); - - const label = - enteredLabel || - (transformName && transformName(value.name, node, input)) || - value.name; - - if (!valueFn && !label) { - logWarn`The link ${source} requires a label be entered!`; - return source; - } - - const hash = node.data.hash && transformNodes(node.data.hash, opts); - - const args = - node.data.args && - Object.fromEntries( - node.data.args.map(({key, value}) => [ - transformNode(key, opts), - transformNodes(value, opts), - ]) - ); - - const fn = htmlFn ? htmlFn : link[linkKey]; - - try { - return fn(value, {text: label, hash, args, language, to}); - } catch (error) { - logError`The link ${source} failed to be processed: ${error}`; - return source; - } -} - -function transformNode(node, opts) { - if (!node) { - throw new Error('Expected a node!'); - } - - if (Array.isArray(node)) { - throw new Error('Got an array - use transformNodes here!'); - } - - switch (node.type) { - case 'text': - return node.data; - case 'tag': - return evaluateTag(node, opts); - default: - throw new Error(`Unknown node type ${node.type}`); - } -} - -function transformNodes(nodes, opts) { - if (!nodes || !Array.isArray(nodes)) { - throw new Error(`Expected an array of nodes! Got: ${nodes}`); - } - - return nodes.map((node) => transformNode(node, opts)).join(''); -} - -export function transformInline(input, { - replacerSpec, - find, - language, - link, - to, - wikiData, -}) { - if (!replacerSpec) throw new Error('Expected replacerSpec'); - if (!find) throw new Error('Expected find'); - if (!language) throw new Error('Expected language'); - if (!link) throw new Error('Expected link'); - if (!to) throw new Error('Expected to'); - if (!wikiData) throw new Error('Expected wikiData'); - - const nodes = parseInput(input); - return transformNodes(nodes, { - input, - find, - link, - replacerSpec, - language, - to, - wikiData, - }); -} |