« get me outta code hell

hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src/util/replacer.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/replacer.js')
-rw-r--r--src/util/replacer.js549
1 files changed, 400 insertions, 149 deletions
diff --git a/src/util/replacer.js b/src/util/replacer.js
index 9d602ca..d1b0a26 100644
--- a/src/util/replacer.js
+++ b/src/util/replacer.js
@@ -1,22 +1,150 @@
-import {logError, logWarn} from './cli.js';
-import {escapeRegex} from './sugar.js';
-
-export function validateReplacerSpec(replacerSpec, {find, link}) {
-  let success = true;
-
-  for (const [key, {link: linkKey, find: findKey, html}] of Object.entries(replacerSpec)) {
-    if (!html && !link[linkKey]) {
-      logError`The replacer spec ${key} has invalid link key ${linkKey}! Specify it in link specs or fix typo.`;
-      success = false;
-    }
-    if (findKey && !find[findKey]) {
-      logError`The replacer spec ${key} has invalid find key ${findKey}! Specify it in find specs or fix typo.`;
-      success = false;
-    }
-  }
-
-  return success;
-}
+// Regex-based forward parser for wiki content, breaking up text input into
+// text and (possibly nested) tag nodes.
+//
+// The behavior here is quite tied into the `transformContent` content
+// function, which converts nodes parsed here into actual HTML, links, etc
+// for embedding in a wiki webpage.
+
+import * as marked from 'marked';
+
+import * as html from '#html';
+import {escapeRegex, typeAppearance} from '#sugar';
+
+export const replacerSpec = {
+  'album': {
+    find: 'album',
+    link: 'linkAlbum',
+  },
+
+  'album-commentary': {
+    find: 'album',
+    link: 'linkAlbumCommentary',
+  },
+
+  'album-gallery': {
+    find: 'album',
+    link: 'linkAlbumGallery',
+  },
+
+  'artist': {
+    find: 'artist',
+    link: 'linkArtist',
+  },
+
+  'artist-gallery': {
+    find: 'artist',
+    link: 'linkArtistGallery',
+  },
+
+  'commentary-index': {
+    find: null,
+    link: 'linkCommentaryIndex',
+  },
+
+  'date': {
+    find: null,
+    value: (ref) => new Date(ref),
+    html: (date, {html, language}) =>
+      html.tag('time',
+        {datetime: date.toUTCString()},
+        language.formatDate(date)),
+  },
+
+  'flash-index': {
+    find: null,
+    link: 'linkFlashIndex',
+  },
+
+  'flash': {
+    find: 'flash',
+    link: 'linkFlash',
+    transformName(name, node, input) {
+      const nextCharacter = input[node.iEnd];
+      const lastCharacter = name[name.length - 1];
+      if (![' ', '\n', '<'].includes(nextCharacter) && lastCharacter === '.') {
+        return name.slice(0, -1);
+      } else {
+        return name;
+      }
+    },
+  },
+
+  'flash-act': {
+    find: 'flashAct',
+    link: 'linkFlashAct',
+  },
+
+  'group': {
+    find: 'group',
+    link: 'linkGroup',
+  },
+
+  'group-gallery': {
+    find: 'group',
+    link: 'linkGroupGallery',
+  },
+
+  'home': {
+    find: null,
+    link: 'linkWikiHome',
+  },
+
+  'listing-index': {
+    find: null,
+    link: 'linkListingIndex',
+  },
+
+  'listing': {
+    find: 'listing',
+    link: 'linkListing',
+  },
+
+  'media': {
+    find: null,
+    link: 'linkPathFromMedia',
+  },
+
+  'news-index': {
+    find: null,
+    link: 'linkNewsIndex',
+  },
+
+  'news-entry': {
+    find: 'newsEntry',
+    link: 'linkNewsEntry',
+  },
+
+  'root': {
+    find: null,
+    link: 'linkPathFromRoot',
+  },
+
+  'site': {
+    find: null,
+    link: 'linkPathFromSite',
+  },
+
+  'static': {
+    find: 'staticPage',
+    link: 'linkStaticPage',
+  },
+
+  'string': {
+    find: null,
+    value: (ref) => ref,
+    html: (ref, {language, args}) => language.$(ref, args),
+  },
+
+  'tag': {
+    find: 'artTag',
+    link: 'linkArtTag',
+  },
+
+  'track': {
+    find: 'track',
+    link: 'linkTrackDynamically',
+  },
+};
 
 // Syntax literals.
 const tagBeginning = '[[';
@@ -75,6 +203,8 @@ function parseNodes(input, i, stopAt, textOnly) {
       string = string.trimEnd();
     }
 
+    string = cleanRawText(string);
+
     if (string.length) {
       nodes.push({i: iString, iEnd: i, type: 'text', data: string});
       string = '';
@@ -221,11 +351,10 @@ function parseNodes(input, i, stopAt, textOnly) {
       let hash;
 
       if (stop_literal === tagHash) {
-        N = parseNodes(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]);
+        N = parseOneTextNode(input, i, [R_tagArgument, R_tagLabel, R_tagEnding]);
 
         if (!stopped) throw endOfInput(i, `reading hash`);
-
-        if (!N) throw makeError(i, `Expected content (hash).`);
+        if (!N) throw makeError(i, `Expected text (hash).`);
 
         hash = N;
         i = stop_iParse;
@@ -293,9 +422,257 @@ function parseNodes(input, i, stopAt, textOnly) {
   return nodes;
 }
 
+export function squashBackslashes(text) {
+  // Squash backslashes which aren't themselves escaped into
+  // the following character, unless that character is one of
+  // a set of characters where the backslash carries meaning
+  // into later formatting (i.e. markdown). Note that we do
+  // NOT compress double backslashes into single backslashes.
+  return text.replace(/([^\\](?:\\{2})*)\\(?![\\*_-])/g, '$1');
+}
+
+export function restoreRawHTMLTags(text) {
+  // Replace stuff like <html:a> with <a>; these signal that
+  // the tag shouldn't be processed by the replacer system,
+  // and should just be embedded into the content as raw HTML.
+  return text.replace(/<html:(.*?)(?=[ >])/g, '<$1');
+}
+
+export function cleanRawText(text) {
+  text = squashBackslashes(text);
+  text = restoreRawHTMLTags(text);
+  return text;
+}
+
+export function postprocessImages(inputNodes) {
+  const outputNodes = [];
+
+  let atStartOfLine = true;
+
+  const lastNode = inputNodes.at(-1);
+
+  for (const node of inputNodes) {
+    if (node.type === 'tag') {
+      atStartOfLine = false;
+    }
+
+    if (node.type === 'text') {
+      const imageRegexp = /<img (.*?)>/g;
+
+      let match = null, parseFrom = 0;
+      while (match = imageRegexp.exec(node.data)) {
+        const previousText = node.data.slice(parseFrom, match.index);
+
+        outputNodes.push({
+          type: 'text',
+          data: previousText,
+          i: node.i + parseFrom,
+          iEnd: node.i + parseFrom + match.index,
+        });
+
+        parseFrom = match.index + match[0].length;
+
+        const imageNode = {type: 'image'};
+        const attributes = html.parseAttributes(match[1]);
+
+        imageNode.src = attributes.get('src');
+
+        if (previousText.endsWith('\n')) {
+          atStartOfLine = true;
+        } else if (previousText.length) {
+          atStartOfLine = false;
+        }
+
+        imageNode.inline = (() => {
+          // Images can force themselves to be rendered inline using a custom
+          // attribute - this style just works better for certain embeds,
+          // usually jokes or small images.
+          if (attributes.get('inline')) return true;
+
+          // If we've already determined we're in the middle of a line,
+          // we're inline. (Of course!)
+          if (!atStartOfLine) {
+            return true;
+          }
+
+          // If there's more text to go in this text node, and what's
+          // remaining doesn't start with a line break, we're inline.
+          if (
+            parseFrom !== node.data.length &&
+            node.data[parseFrom] !== '\n'
+          ) {
+            return true;
+          }
+
+          // If we're at the end of this text node, but this text node
+          // isn't the last node overall, we're inline.
+          if (
+            parseFrom === node.data.length &&
+            node !== lastNode
+          ) {
+            return true;
+          }
+
+          // If no other condition matches, this image is on its own line.
+          return false;
+        })();
+
+        if (attributes.get('link')) imageNode.link = attributes.get('link');
+        if (attributes.get('style')) imageNode.style = attributes.get('style');
+        if (attributes.get('width')) imageNode.width = parseInt(attributes.get('width'));
+        if (attributes.get('height')) imageNode.height = parseInt(attributes.get('height'));
+        if (attributes.get('align')) imageNode.align = attributes.get('align');
+        if (attributes.get('pixelate')) imageNode.pixelate = true;
+
+        if (attributes.get('warning')) {
+          imageNode.warnings =
+            attributes.get('warning').split(', ');
+        }
+
+        outputNodes.push(imageNode);
+
+        // No longer at the start of a line after an image - there will at
+        // least be a text node with only '\n' before the next image that's
+        // on its own line.
+        atStartOfLine = false;
+      }
+
+      if (parseFrom !== node.data.length) {
+        outputNodes.push({
+          type: 'text',
+          data: node.data.slice(parseFrom),
+          i: node.i + parseFrom,
+          iEnd: node.iEnd,
+        });
+      }
+
+      continue;
+    }
+
+    outputNodes.push(node);
+  }
+
+  return outputNodes;
+}
+
+export function postprocessHeadings(inputNodes) {
+  const outputNodes = [];
+
+  for (const node of inputNodes) {
+    if (node.type !== 'text') {
+      outputNodes.push(node);
+      continue;
+    }
+
+    const headingRegexp = /<h2 (.*?)>/g;
+
+    let textContent = '';
+
+    let match = null, parseFrom = 0;
+    while (match = headingRegexp.exec(node.data)) {
+      textContent += node.data.slice(parseFrom, match.index);
+      parseFrom = match.index + match[0].length;
+
+      const attributes = html.parseAttributes(match[1]);
+      attributes.push('class', 'content-heading');
+
+      // We're only modifying the opening tag here. The remaining content,
+      // including the closing tag, will be pushed as-is.
+      textContent += `<h2 ${attributes}>`;
+    }
+
+    if (parseFrom !== node.data.length) {
+      textContent += node.data.slice(parseFrom);
+    }
+
+    outputNodes.push({
+      type: 'text',
+      data: textContent,
+      i: node.i,
+      iEnd: node.iEnd,
+    });
+  }
+
+  return outputNodes;
+}
+
+export function postprocessExternalLinks(inputNodes) {
+  const outputNodes = [];
+
+  for (const node of inputNodes) {
+    if (node.type !== 'text') {
+      outputNodes.push(node);
+      continue;
+    }
+
+    const plausibleLinkRegexp = /\[.*?\)/g;
+
+    let textContent = '';
+
+    let plausibleMatch = null, parseFrom = 0;
+    while (plausibleMatch = plausibleLinkRegexp.exec(node.data)) {
+      textContent += node.data.slice(parseFrom, plausibleMatch.index);
+
+      // Pedantic rules use more particular parentheses detection in link
+      // destinations - they allow one level of balanced parentheses, and
+      // otherwise, parentheses must be escaped. This allows for entire links
+      // to be wrapped in parentheses, e.g below:
+      //
+      //   This is so cool. ([You know??](https://example.com))
+      //
+      const definiteMatch =
+        marked.Lexer.rules.inline.pedantic.link
+          .exec(node.data.slice(plausibleMatch.index));
+
+      if (definiteMatch) {
+        const {1: label, 2: href} = definiteMatch;
+
+        // Split the containing text node into two - the second of these will
+        // be added after iterating over matches, or by the next match.
+        if (textContent.length) {
+          outputNodes.push({type: 'text', data: textContent});
+          textContent = '';
+        }
+
+        const offset = plausibleMatch.index + definiteMatch.index;
+        const length = definiteMatch[0].length;
+
+        outputNodes.push({
+          i: node.i + offset,
+          iEnd: node.i + offset + length,
+          type: 'external-link',
+          data: {label, href},
+        });
+
+        parseFrom = offset + length;
+      } else {
+        parseFrom = plausibleMatch.index;
+      }
+    }
+
+    if (parseFrom !== node.data.length) {
+      textContent += node.data.slice(parseFrom);
+    }
+
+    if (textContent.length) {
+      outputNodes.push({type: 'text', data: textContent});
+    }
+  }
+
+  return outputNodes;
+}
+
 export function parseInput(input) {
+  if (typeof input !== 'string') {
+    throw new TypeError(`Expected input to be string, got ${typeAppearance(input)}`);
+  }
+
   try {
-    return parseNodes(input, 0);
+    let output = parseNodes(input, 0);
+    output = postprocessImages(output);
+    output = postprocessHeadings(output);
+    output = postprocessExternalLinks(output);
+    return output;
   } catch (errorNode) {
     if (errorNode.type !== 'error') {
       throw errorNode;
@@ -331,129 +708,3 @@ export function parseInput(input) {
     ].join('\n'));
   }
 }
-
-function evaluateTag(node, opts) {
-  const {find, input, language, link, replacerSpec, to} = opts;
-
-  const source = input.slice(node.i, node.iEnd);
-
-  const replacerKeyImplied = !node.data.replacerKey;
-  const replacerKey = replacerKeyImplied ? 'track' : node.data.replacerKey.data;
-
-  if (!replacerSpec[replacerKey]) {
-    logWarn`The link ${source} has an invalid replacer key!`;
-    return source;
-  }
-
-  const {
-    find: findKey,
-    link: linkKey,
-    value: valueFn,
-    html: htmlFn,
-    transformName,
-  } = replacerSpec[replacerKey];
-
-  const replacerValue = transformNodes(node.data.replacerValue, opts);
-
-  const value = valueFn
-    ? valueFn(replacerValue)
-    : findKey
-    ? find[findKey](
-        replacerKeyImplied ? replacerValue : replacerKey + `:` + replacerValue
-      )
-    : {
-        directory: replacerValue,
-        name: null,
-      };
-
-  if (!value) {
-    logWarn`The link ${source} does not match anything!`;
-    return source;
-  }
-
-  const enteredLabel = node.data.label && transformNode(node.data.label, opts);
-
-  const label =
-    enteredLabel ||
-    (transformName && transformName(value.name, node, input)) ||
-    value.name;
-
-  if (!valueFn && !label) {
-    logWarn`The link ${source} requires a label be entered!`;
-    return source;
-  }
-
-  const hash = node.data.hash && transformNodes(node.data.hash, opts);
-
-  const args =
-    node.data.args &&
-    Object.fromEntries(
-      node.data.args.map(({key, value}) => [
-        transformNode(key, opts),
-        transformNodes(value, opts),
-      ])
-    );
-
-  const fn = htmlFn ? htmlFn : link[linkKey];
-
-  try {
-    return fn(value, {text: label, hash, args, language, to});
-  } catch (error) {
-    logError`The link ${source} failed to be processed: ${error}`;
-    return source;
-  }
-}
-
-function transformNode(node, opts) {
-  if (!node) {
-    throw new Error('Expected a node!');
-  }
-
-  if (Array.isArray(node)) {
-    throw new Error('Got an array - use transformNodes here!');
-  }
-
-  switch (node.type) {
-    case 'text':
-      return node.data;
-    case 'tag':
-      return evaluateTag(node, opts);
-    default:
-      throw new Error(`Unknown node type ${node.type}`);
-  }
-}
-
-function transformNodes(nodes, opts) {
-  if (!nodes || !Array.isArray(nodes)) {
-    throw new Error(`Expected an array of nodes! Got: ${nodes}`);
-  }
-
-  return nodes.map((node) => transformNode(node, opts)).join('');
-}
-
-export function transformInline(input, {
-  replacerSpec,
-  find,
-  language,
-  link,
-  to,
-  wikiData,
-}) {
-  if (!replacerSpec) throw new Error('Expected replacerSpec');
-  if (!find) throw new Error('Expected find');
-  if (!language) throw new Error('Expected language');
-  if (!link) throw new Error('Expected link');
-  if (!to) throw new Error('Expected to');
-  if (!wikiData) throw new Error('Expected wikiData');
-
-  const nodes = parseInput(input);
-  return transformNodes(nodes, {
-    input,
-    find,
-    link,
-    replacerSpec,
-    language,
-    to,
-    wikiData,
-  });
-}