diff options
Diffstat (limited to 'src/util')
-rw-r--r-- | src/util/replacer.js | 284 | ||||
-rw-r--r-- | src/util/transform-content.js | 452 |
2 files changed, 145 insertions, 591 deletions
diff --git a/src/util/replacer.js b/src/util/replacer.js index 50a90004..7240940d 100644 --- a/src/util/replacer.js +++ b/src/util/replacer.js @@ -1,23 +1,6 @@ import {logError, logWarn} from './cli.js'; import {escapeRegex} from './sugar.js'; -export function validateReplacerSpec(replacerSpec, {find, link}) { - let success = true; - - for (const [key, {link: linkKey, find: findKey, html}] of Object.entries(replacerSpec)) { - if (!html && !link[linkKey]) { - logError`The replacer spec ${key} has invalid link key ${linkKey}! Specify it in link specs or fix typo.`; - success = false; - } - if (findKey && !find[findKey]) { - logError`The replacer spec ${key} has invalid find key ${findKey}! Specify it in find specs or fix typo.`; - success = false; - } - } - - return success; -} - // Syntax literals. const tagBeginning = '[['; const tagEnding = ']]'; @@ -292,13 +275,157 @@ function parseNodes(input, i, stopAt, textOnly) { return nodes; } +function parseAttributes(string) { + const attributes = Object.create(null); + + const skipWhitespace = i => { + if (!/\s/.test(string[i])) { + return i; + } + + const match = string.slice(i).match(/[^\s]/); + if (match) { + return i + match.index; + } + + return string.length; + }; + + for (let i = 0; i < string.length; ) { + i = skipWhitespace(i); + const aStart = i; + const aEnd = i + string.slice(i).match(/[\s=]|$/).index; + const attribute = string.slice(aStart, aEnd); + i = skipWhitespace(aEnd); + if (string[i] === '=') { + i = skipWhitespace(i + 1); + let end, endOffset; + if (string[i] === '"' || string[i] === "'") { + end = string[i]; + endOffset = 1; + i++; + } else { + end = '\\s'; + endOffset = 0; + } + const vStart = i; + const vEnd = i + string.slice(i).match(new RegExp(`${end}|$`)).index; + const value = string.slice(vStart, vEnd); + i = vEnd + endOffset; + attributes[attribute] = value; + } else { + attributes[attribute] = attribute; + } + } + + return ( + Object.fromEntries( + Object.entries(attributes) + .map(([key, val]) => [ + key, + val === 'true' + ? true + : val === 'false' + ? false + : val === key + ? true + : val, + ]))); +} + +export function postprocessImages(inputNodes) { + const outputNodes = []; + + let atStartOfLine = true; + + const lastNode = inputNodes[inputNodes.length - 1]; + + for (const node of inputNodes) { + if (node.type === 'tag') { + atStartOfLine = false; + } + + if (node.type === 'text') { + const imageRegexp = /<img (.*?)>/g; + + let match = null, parseFrom = 0; + while (match = imageRegexp.exec(node.data)) { + const previousText = node.data.slice(parseFrom, match.index); + outputNodes.push({type: 'text', data: previousText}); + parseFrom = match.index + match[0].length; + + const imageNode = {type: 'image'}; + const attributes = parseAttributes(match[1]); + + imageNode.src = attributes.src; + + if (previousText.endsWith('\n')) { + atStartOfLine = true; + } + + imageNode.inline = (() => { + // If we've already determined we're in the middle of a line, + // we're inline. (Of course!) + if (!atStartOfLine) { + return true; + } + + // If there's more text to go in this text node, and what's + // remaining doesn't start with a line break, we're inline. + if ( + parseFrom !== node.data.length && + node.data[parseFrom] !== '\n' + ) { + return true; + } + + // If we're at the end of this text node, but this text node + // isn't the last node overall, we're inline. + if ( + parseFrom === node.data.length && + node !== lastNode + ) { + return true; + } + + // If no other condition matches, this image is on its own line. + return false; + })(); + + if (attributes.width) imageNode.width = parseInt(attributes.width); + if (attributes.height) imageNode.height = parseInt(attributes.height); + + outputNodes.push(imageNode); + + // No longer at the start of a line after an image - there will at + // least be a text node with only '\n' before the next image that's + // on its own line. + atStartOfLine = false; + } + + if (parseFrom !== node.data.length) { + outputNodes.push({ + type: 'text', + data: node.data.slice(parseFrom), + }); + } + + continue; + } + + outputNodes.push(node); + } + + return outputNodes; +} + export function parseInput(input) { if (typeof input !== 'string') { throw new TypeError(`Expected input to be string, got ${input}`); } try { - return parseNodes(input, 0); + return postprocessImages(parseNodes(input, 0)); } catch (errorNode) { if (errorNode.type !== 'error') { throw errorNode; @@ -334,124 +461,3 @@ export function parseInput(input) { ].join('\n')); } } - -function evaluateTag(node, opts) { - const {find, input, language, link, replacerSpec, to} = opts; - - const source = input.slice(node.i, node.iEnd); - - const replacerKeyImplied = !node.data.replacerKey; - const replacerKey = replacerKeyImplied ? 'track' : node.data.replacerKey.data; - - if (!replacerSpec[replacerKey]) { - logWarn`The link ${source} has an invalid replacer key!`; - return source; - } - - const { - find: findKey, - link: linkKey, - value: valueFn, - html: htmlFn, - transformName, - } = replacerSpec[replacerKey]; - - const replacerValue = transformNodes(node.data.replacerValue, opts); - - const value = valueFn - ? valueFn(replacerValue) - : findKey - ? find[findKey]( - replacerKeyImplied ? replacerValue : replacerKey + `:` + replacerValue - ) - : { - directory: replacerValue, - name: null, - }; - - if (!value) { - logWarn`The link ${source} does not match anything!`; - return source; - } - - const enteredLabel = node.data.label && transformNode(node.data.label, opts); - - const label = - enteredLabel || - (transformName && transformName(value.name, node, input)) || - null; - - const hash = node.data.hash && transformNode(node.data.hash, opts); - - const args = - node.data.args && - Object.fromEntries( - node.data.args.map(({key, value}) => [ - transformNode(key, opts), - transformNodes(value, opts), - ]) - ); - - const fn = htmlFn ? htmlFn : link[linkKey]; - - try { - return fn(value, {text: label, hash, args, language, to}); - } catch (error) { - logError`The link ${source} failed to be processed: ${error}`; - return source; - } -} - -function transformNode(node, opts) { - if (!node) { - throw new Error('Expected a node!'); - } - - if (Array.isArray(node)) { - throw new Error('Got an array - use transformNodes here!'); - } - - switch (node.type) { - case 'text': - return node.data; - case 'tag': - return evaluateTag(node, opts); - default: - throw new Error(`Unknown node type ${node.type}`); - } -} - -function transformNodes(nodes, opts) { - if (!nodes || !Array.isArray(nodes)) { - throw new Error(`Expected an array of nodes! Got: ${nodes}`); - } - - return nodes.map((node) => transformNode(node, opts)).join(''); -} - -export function transformInline(input, { - replacerSpec, - find, - language, - link, - to, - wikiData, -}) { - if (!replacerSpec) throw new Error('Expected replacerSpec'); - if (!find) throw new Error('Expected find'); - if (!language) throw new Error('Expected language'); - if (!link) throw new Error('Expected link'); - if (!to) throw new Error('Expected to'); - if (!wikiData) throw new Error('Expected wikiData'); - - const nodes = parseInput(input); - return transformNodes(nodes, { - input, - find, - link, - replacerSpec, - language, - to, - wikiData, - }); -} diff --git a/src/util/transform-content.js b/src/util/transform-content.js deleted file mode 100644 index 454cb374..00000000 --- a/src/util/transform-content.js +++ /dev/null @@ -1,452 +0,0 @@ -// See also replacer.js, which covers the actual syntax parser and node -// interpreter. This file works with replacer.js to provide higher-level -// interfaces for converting various content found in wiki data to HTML for -// display on the site. - -export {transformInline} from './replacer.js'; - -export const replacerSpec = { - album: { - find: 'album', - link: 'album', - }, - 'album-commentary': { - find: 'album', - link: 'albumCommentary', - }, - 'album-gallery': { - find: 'album', - link: 'albumGallery', - }, - artist: { - find: 'artist', - link: 'artist', - }, - 'artist-gallery': { - find: 'artist', - link: 'artistGallery', - }, - 'commentary-index': { - find: null, - link: 'commentaryIndex', - }, - date: { - find: null, - value: (ref) => new Date(ref), - html: (date, {html, language}) => - html.tag('time', - {datetime: date.toString()}, - language.formatDate(date)), - }, - 'flash-index': { - find: null, - link: 'flashIndex', - }, - flash: { - find: 'flash', - link: 'flash', - transformName(name, node, input) { - const nextCharacter = input[node.iEnd]; - const lastCharacter = name[name.length - 1]; - if (![' ', '\n', '<'].includes(nextCharacter) && lastCharacter === '.') { - return name.slice(0, -1); - } else { - return name; - } - }, - }, - group: { - find: 'group', - link: 'groupInfo', - }, - 'group-gallery': { - find: 'group', - link: 'groupGallery', - }, - home: { - find: null, - link: 'home', - }, - 'listing-index': { - find: null, - link: 'listingIndex', - }, - listing: { - find: 'listing', - link: 'listing', - }, - media: { - find: null, - link: 'media', - }, - 'news-index': { - find: null, - link: 'newsIndex', - }, - 'news-entry': { - find: 'newsEntry', - link: 'newsEntry', - }, - root: { - find: null, - link: 'root', - }, - site: { - find: null, - link: 'site', - }, - static: { - find: 'staticPage', - link: 'staticPage', - }, - string: { - find: null, - value: (ref) => ref, - html: (ref, {language, args}) => language.$(ref, args), - }, - tag: { - find: 'artTag', - link: 'tag', - }, - track: { - find: 'track', - link: 'track', - }, -}; - -function splitLines(text) { - return text.split(/\r\n|\r|\n/); -} - -function joinLineBreaks(sourceLines) { - const outLines = []; - - let lineSoFar = ''; - for (let i = 0; i < sourceLines.length; i++) { - const line = sourceLines[i]; - lineSoFar += line; - if (!line.endsWith('<br>')) { - outLines.push(lineSoFar); - lineSoFar = ''; - } - } - - if (lineSoFar) { - outLines.push(lineSoFar); - } - - return outLines; -} - -function parseAttributes(string, {to}) { - const attributes = Object.create(null); - const skipWhitespace = (i) => { - const ws = /\s/; - if (ws.test(string[i])) { - const match = string.slice(i).match(/[^\s]/); - if (match) { - return i + match.index; - } else { - return string.length; - } - } else { - return i; - } - }; - - for (let i = 0; i < string.length; ) { - i = skipWhitespace(i); - const aStart = i; - const aEnd = i + string.slice(i).match(/[\s=]|$/).index; - const attribute = string.slice(aStart, aEnd); - i = skipWhitespace(aEnd); - if (string[i] === '=') { - i = skipWhitespace(i + 1); - let end, endOffset; - if (string[i] === '"' || string[i] === "'") { - end = string[i]; - endOffset = 1; - i++; - } else { - end = '\\s'; - endOffset = 0; - } - const vStart = i; - const vEnd = i + string.slice(i).match(new RegExp(`${end}|$`)).index; - const value = string.slice(vStart, vEnd); - i = vEnd + endOffset; - if (attribute === 'src' && value.startsWith('media/')) { - attributes[attribute] = to('media.path', value.slice('media/'.length)); - } else { - attributes[attribute] = value; - } - } else { - attributes[attribute] = attribute; - } - } - return Object.fromEntries( - Object.entries(attributes).map(([key, val]) => [ - key, - val === 'true' - ? true - : val === 'false' - ? false - : val === key - ? true - : val, - ]) - ); -} - -function unbound_transformMultiline(text, { - img, - to, - transformInline, - - thumb = null, -}) { - // Heck yes, HTML magics. - - text = transformInline(text.trim()); - - const outLines = []; - - const indentString = ' '.repeat(4); - - let levelIndents = []; - const openLevel = (indent) => { - // opening a sublist is a pain: to be semantically *and* visually - // correct, we have to append the <ul> at the end of the existing - // previous <li> - const previousLine = outLines[outLines.length - 1]; - if (previousLine?.endsWith('</li>')) { - // we will re-close the <li> later - outLines[outLines.length - 1] = previousLine.slice(0, -5) + ' <ul>'; - } else { - // if the previous line isn't a list item, this is the opening of - // the first list level, so no need for indent - outLines.push('<ul>'); - } - levelIndents.push(indent); - }; - const closeLevel = () => { - levelIndents.pop(); - if (levelIndents.length) { - // closing a sublist, so close the list item containing it too - outLines.push(indentString.repeat(levelIndents.length) + '</ul></li>'); - } else { - // closing the final list level! no need for indent here - outLines.push('</ul>'); - } - }; - - // okay yes we should support nested formatting, more than one blockquote - // layer, etc, but hear me out here: making all that work would basically - // be the same as implementing an entire markdown converter, which im not - // interested in doing lol. sorry!!! - let inBlockquote = false; - - let lines = splitLines(text); - lines = joinLineBreaks(lines); - for (let line of lines) { - const imageLine = line.startsWith('<img'); - line = line.replace(/<img (.*?)>/g, (match, attributes) => - img({ - lazy: true, - link: true, - thumb, - ...parseAttributes(attributes, {to}), - }) - ); - - let indentThisLine = 0; - let lineContent = line; - let lineTag = 'p'; - - const listMatch = line.match(/^( *)- *(.*)$/); - if (listMatch) { - // is a list item! - if (!levelIndents.length) { - // first level is always indent = 0, regardless of actual line - // content (this is to avoid going to a lesser indent than the - // initial level) - openLevel(0); - } else { - // find level corresponding to indent - const indent = listMatch[1].length; - let i; - for (i = levelIndents.length - 1; i >= 0; i--) { - if (levelIndents[i] <= indent) break; - } - // note: i cannot equal -1 because the first indentation level - // is always 0, and the minimum indentation is also 0 - if (levelIndents[i] === indent) { - // same indent! return to that level - while (levelIndents.length - 1 > i) closeLevel(); - // (if this is already the current level, the above loop - // will do nothing) - } else if (levelIndents[i] < indent) { - // lesser indent! branch based on index - if (i === levelIndents.length - 1) { - // top level is lesser: add a new level - openLevel(indent); - } else { - // lower level is lesser: return to that level - while (levelIndents.length - 1 > i) closeLevel(); - } - } - } - // finally, set variables for appending content line - indentThisLine = levelIndents.length; - lineContent = listMatch[2]; - lineTag = 'li'; - } else { - // not a list item! close any existing list levels - while (levelIndents.length) closeLevel(); - - // like i said, no nested shenanigans - quotes only appear outside - // of lists. sorry! - const quoteMatch = line.match(/^> *(.*)$/); - if (quoteMatch) { - // is a quote! open a blockquote tag if it doesnt already exist - if (!inBlockquote) { - inBlockquote = true; - outLines.push('<blockquote>'); - } - indentThisLine = 1; - lineContent = quoteMatch[1]; - } else if (inBlockquote) { - // not a quote! close a blockquote tag if it exists - inBlockquote = false; - outLines.push('</blockquote>'); - } - - // let some escaped symbols display as the normal symbol, since the - // point of escaping them is just to avoid having them be treated as - // syntax markers! - if (lineContent.match(/( *)\\-/)) { - lineContent = lineContent.replace('\\-', '-'); - } else if (lineContent.match(/( *)\\>/)) { - lineContent = lineContent.replace('\\>', '>'); - } - } - - if (lineTag === 'p') { - // certain inline element tags should still be postioned within a - // paragraph; other elements (e.g. headings) should be added as-is - const elementMatch = line.match(/^<(.*?)[ >]/); - if ( - elementMatch && - !imageLine && - ![ - 'a', - 'abbr', - 'b', - 'bdo', - 'br', - 'cite', - 'code', - 'data', - 'datalist', - 'del', - 'dfn', - 'em', - 'i', - 'img', - 'ins', - 'kbd', - 'mark', - 'output', - 'picture', - 'q', - 'ruby', - 'samp', - 'small', - 'span', - 'strong', - 'sub', - 'sup', - 'svg', - 'time', - 'var', - 'wbr', - ].includes(elementMatch[1]) - ) { - lineTag = ''; - } - - // for sticky headings! - if (elementMatch && elementMatch[1] === 'h2') { - lineContent = lineContent.replace(/<h2(.*?)>/g, (match, attributes) => { - const parsedAttributes = parseAttributes(attributes, {to}); - return `<h2 ${html.attributes({ - ...parsedAttributes, - class: [...parsedAttributes.class?.split(' ') ?? [], 'content-heading'], - })}>`; - }); - } - } - - let pushString = indentString.repeat(indentThisLine); - if (lineTag) { - pushString += `<${lineTag}>${lineContent}</${lineTag}>`; - } else { - pushString += lineContent; - } - outLines.push(pushString); - } - - // after processing all lines... - - // if still in a list, close all levels - while (levelIndents.length) closeLevel(); - - // if still in a blockquote, close its tag - if (inBlockquote) { - inBlockquote = false; - outLines.push('</blockquote>'); - } - - return outLines.join('\n'); -} - -function unbound_transformLyrics(text, { - transformInline, - transformMultiline, -}) { - // Different from transformMultiline 'cuz it joins multiple lines together - // with line 8reaks (<br>); transformMultiline treats each line as its own - // complete paragraph (or list, etc). - - // If it looks like old data, then like, oh god. - // Use the normal transformMultiline tool. - if (text.includes('<br')) { - return transformMultiline(text); - } - - text = transformInline(text.trim()); - - let buildLine = ''; - const addLine = () => outLines.push(`<p>${buildLine}</p>`); - const outLines = []; - for (const line of text.split('\n')) { - if (line.length) { - if (buildLine.length) { - buildLine += '<br>'; - } - buildLine += line; - } else if (buildLine.length) { - addLine(); - buildLine = ''; - } - } - if (buildLine.length) { - addLine(); - } - return outLines.join('\n'); -} - -export { - unbound_transformLyrics as transformLyrics, - unbound_transformMultiline as transformMultiline -} |