From 16286da93ad64ab3d944d02bb9faa7a7310e0ce1 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Sun, 26 Jan 2025 17:16:25 -0400 Subject: move some modules out of util, data --- src/replacer.js | 852 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 852 insertions(+) create mode 100644 src/replacer.js (limited to 'src/replacer.js') diff --git a/src/replacer.js b/src/replacer.js new file mode 100644 index 00000000..e3f5623e --- /dev/null +++ b/src/replacer.js @@ -0,0 +1,852 @@ +// Regex-based forward parser for wiki content, breaking up text input into +// text and (possibly nested) tag nodes. +// +// The behavior here is quite tied into the `transformContent` content +// function, which converts nodes parsed here into actual HTML, links, etc +// for embedding in a wiki webpage. + +import * as marked from 'marked'; + +import * as html from '#html'; +import {escapeRegex, typeAppearance} from '#sugar'; + +export const replacerSpec = { + 'album': { + find: 'album', + link: 'linkAlbumDynamically', + }, + + 'album-commentary': { + find: 'album', + link: 'linkAlbumCommentary', + }, + + 'album-gallery': { + find: 'album', + link: 'linkAlbumGallery', + }, + + 'artist': { + find: 'artist', + link: 'linkArtist', + }, + + 'artist-gallery': { + find: 'artist', + link: 'linkArtistGallery', + }, + + 'commentary-index': { + find: null, + link: 'linkCommentaryIndex', + }, + + 'date': { + find: null, + value: (ref) => new Date(ref), + html: (date, {html, language}) => + html.tag('time', + {datetime: date.toUTCString()}, + language.formatDate(date)), + }, + + 'flash-index': { + find: null, + link: 'linkFlashIndex', + }, + + 'flash': { + find: 'flash', + link: 'linkFlash', + transformName(name, node, input) { + const nextCharacter = input[node.iEnd]; + const lastCharacter = name[name.length - 1]; + if (![' ', '\n', '<'].includes(nextCharacter) && lastCharacter === '.') { + return name.slice(0, -1); + } else { + return name; + } + }, + }, + + 'flash-act': { + find: 'flashAct', + link: 'linkFlashAct', + }, + + 'group': { + find: 'group', + link: 'linkGroup', + }, + + 'group-gallery': { + find: 'group', + link: 'linkGroupGallery', + }, + + 'home': { + find: null, + link: 'linkWikiHome', + }, + + 'listing-index': { + find: null, + link: 'linkListingIndex', + }, + + 'listing': { + find: 'listing', + link: 'linkListing', + }, + + 'media': { + find: null, + link: 'linkPathFromMedia', + }, + + 'news-index': { + find: null, + link: 'linkNewsIndex', + }, + + 'news-entry': { + find: 'newsEntry', + link: 'linkNewsEntry', + }, + + 'root': { + find: null, + link: 'linkPathFromRoot', + }, + + 'site': { + find: null, + link: 'linkPathFromSite', + }, + + 'static': { + find: 'staticPage', + link: 'linkStaticPage', + }, + + 'string': { + find: null, + value: (ref) => ref, + html: (ref, {language, args}) => language.$(ref, args), + }, + + 'tag': { + find: 'artTag', + link: 'linkArtTag', + }, + + 'track': { + find: 'track', + link: 'linkTrackDynamically', + }, +}; + +// Syntax literals. +const tagBeginning = '[['; +const tagEnding = ']]'; +const tagReplacerValue = ':'; +const tagHash = '#'; +const tagArgument = '*'; +const tagArgumentValue = '='; +const tagLabel = '|'; + +const noPrecedingWhitespace = '(? ({i, type: 'error', data: {message}}); +const endOfInput = (i, comment) => + makeError(i, `Unexpected end of input (${comment}).`); + +// These are 8asically stored on the glo8al scope, which might seem odd +// for a recursive function, 8ut the values are only ever used immediately +// after they're set. +let stopped, stop_iParse, stop_literal; + +function parseOneTextNode(input, i, stopAt) { + return parseNodes(input, i, stopAt, true)[0]; +} + +function parseNodes(input, i, stopAt, textOnly) { + let nodes = []; + let string = ''; + let iString = 0; + + stopped = false; + + const pushTextNode = (isLast) => { + string = input.slice(iString, i); + + // If this is the last text node 8efore stopping (at a stopAt match + // or the end of the input), trim off whitespace at the end. + if (isLast) { + string = string.trimEnd(); + } + + string = cleanRawText(string); + + if (string.length) { + nodes.push({i: iString, iEnd: i, type: 'text', data: string}); + string = ''; + } + }; + + const literalsToMatch = stopAt + ? stopAt.concat([R_tagBeginning]) + : [R_tagBeginning]; + + // The 8ackslash stuff here is to only match an even (or zero) num8er + // of sequential 'slashes. Even amounts always cancel out! Odd amounts + // don't, which would mean the following literal is 8eing escaped and + // should 8e counted only as part of the current string/text. + // + // Inspired 8y this: https://stackoverflow.com/a/41470813 + const regexpSource = `(?-])/g, '$1'); +} + +export function restoreRawHTMLTags(text) { + // Replace stuff like with ; these signal that + // the tag shouldn't be processed by the replacer system, + // and should just be embedded into the content as raw HTML. + return text.replace(/])/g, '<$1'); +} + +export function cleanRawText(text) { + text = squashBackslashes(text); + text = restoreRawHTMLTags(text); + return text; +} + +export function postprocessComments(inputNodes) { + const outputNodes = []; + + for (const node of inputNodes) { + if (node.type !== 'text') { + outputNodes.push(node); + continue; + } + + const commentRegexp = + new RegExp( + (// Remove comments which occupy entire lines, trimming the line break + // leading into them. These comments never include the ending of a + // comment which does not end a line, which is a regex way of saying + // "please fail early if we hit a --> that doesn't happen at the end + // of the line". + String.raw`\n(?!$))[\s\S])*?-->(?=$)` + + '|' + + + // Remove comments which appear at the start of a line, and any + // following spaces. + String.raw`^ *` + + + '|' + + + // Remove comments which appear anywhere else, including in the + // middle of a line or at the end of a line, and any leading spaces. + String.raw` *`), + + 'gm'); + + outputNodes.push({ + type: 'text', + + data: + node.data.replace(commentRegexp, ''), + + i: node.i, + iEnd: node.iEnd, + }); + } + + return outputNodes; +} + +export function postprocessImages(inputNodes) { + const outputNodes = []; + + let atStartOfLine = true; + + const lastNode = inputNodes.at(-1); + + for (const node of inputNodes) { + if (node.type === 'tag') { + atStartOfLine = false; + } + + if (node.type === 'text') { + const imageRegexp = //g; + + let match = null, parseFrom = 0; + while (match = imageRegexp.exec(node.data)) { + const previousText = node.data.slice(parseFrom, match.index); + + outputNodes.push({ + type: 'text', + data: previousText, + i: node.i + parseFrom, + iEnd: node.i + parseFrom + match.index, + }); + + parseFrom = match.index + match[0].length; + + const imageNode = {type: 'image'}; + const attributes = html.parseAttributes(match[1]); + + imageNode.src = attributes.get('src'); + + if (previousText.endsWith('\n')) { + atStartOfLine = true; + } else if (previousText.length) { + atStartOfLine = false; + } + + imageNode.inline = (() => { + // Images can force themselves to be rendered inline using a custom + // attribute - this style just works better for certain embeds, + // usually jokes or small images. + if (attributes.get('inline')) return true; + + // If we've already determined we're in the middle of a line, + // we're inline. (Of course!) + if (!atStartOfLine) { + return true; + } + + // If there's more text to go in this text node, and what's + // remaining doesn't start with a line break, we're inline. + if ( + parseFrom !== node.data.length && + node.data[parseFrom] !== '\n' + ) { + return true; + } + + // If we're at the end of this text node, but this text node + // isn't the last node overall, we're inline. + if ( + parseFrom === node.data.length && + node !== lastNode + ) { + return true; + } + + // If no other condition matches, this image is on its own line. + return false; + })(); + + if (attributes.get('link')) imageNode.link = attributes.get('link'); + if (attributes.get('style')) imageNode.style = attributes.get('style'); + if (attributes.get('width')) imageNode.width = parseInt(attributes.get('width')); + if (attributes.get('height')) imageNode.height = parseInt(attributes.get('height')); + if (attributes.get('align')) imageNode.align = attributes.get('align'); + if (attributes.get('pixelate')) imageNode.pixelate = true; + + if (attributes.get('warning')) { + imageNode.warnings = + attributes.get('warning').split(', '); + } + + outputNodes.push(imageNode); + + // No longer at the start of a line after an image - there will at + // least be a text node with only '\n' before the next image that's + // on its own line. + atStartOfLine = false; + } + + if (parseFrom !== node.data.length) { + outputNodes.push({ + type: 'text', + data: node.data.slice(parseFrom), + i: node.i + parseFrom, + iEnd: node.iEnd, + }); + } + + continue; + } + + outputNodes.push(node); + } + + return outputNodes; +} + +export function postprocessVideos(inputNodes) { + const outputNodes = []; + + for (const node of inputNodes) { + if (node.type !== 'text') { + outputNodes.push(node); + continue; + } + + const videoRegexp = /