« get me outta code hell

hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src/replacer.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/replacer.js')
-rw-r--r--src/replacer.js464
1 files changed, 311 insertions, 153 deletions
diff --git a/src/replacer.js b/src/replacer.js
index 5378db1d..779ee78d 100644
--- a/src/replacer.js
+++ b/src/replacer.js
@@ -8,7 +8,8 @@
 import * as marked from 'marked';
 
 import * as html from '#html';
-import {escapeRegex, typeAppearance} from '#sugar';
+import {empty, escapeRegex, typeAppearance} from '#sugar';
+import {matchMarkdownLinks} from '#wiki-data';
 
 export const replacerSpec = {
   'album': {
@@ -152,7 +153,12 @@ export const replacerSpec = {
 
   'tag': {
     find: 'artTag',
-    link: 'linkArtTag',
+    link: 'linkArtTagDynamically',
+  },
+
+  'tag-info': {
+    find: 'artTag',
+    link: 'linkArtTagInfo',
   },
 
   'track': {
@@ -169,6 +175,11 @@ export const replacerSpec = {
     find: 'trackWithArtwork',
     link: 'linkTrackReferencingArtworks',
   },
+
+  'tooltip': {
+    value: (ref) => ref,
+    link: null,
+  }
 };
 
 // Syntax literals.
@@ -453,7 +464,7 @@ export function squashBackslashes(text) {
   // a set of characters where the backslash carries meaning
   // into later formatting (i.e. markdown). Note that we do
   // NOT compress double backslashes into single backslashes.
-  return text.replace(/([^\\](?:\\{2})*)\\(?![\\*_~>-])/g, '$1');
+  return text.replace(/([^\\](?:\\{2})*)\\(?![\\*_~>.-])/g, '$1');
 }
 
 export function restoreRawHTMLTags(text) {
@@ -513,23 +524,30 @@ export function postprocessComments(inputNodes) {
   return outputNodes;
 }
 
-export function postprocessImages(inputNodes) {
+function postprocessHTMLTags(inputNodes, tagName, callback) {
   const outputNodes = [];
-
-  let atStartOfLine = true;
+  const errors = [];
 
   const lastNode = inputNodes.at(-1);
 
+  const regexp =
+    new RegExp(
+      `<${tagName} (.*?)>` +
+      (html.selfClosingTags.includes(tagName)
+        ? ''
+        : `(?:</${tagName}>)?`),
+      'g');
+
+  let atStartOfLine = true;
+
   for (const node of inputNodes) {
     if (node.type === 'tag') {
       atStartOfLine = false;
     }
 
     if (node.type === 'text') {
-      const imageRegexp = /<img (.*?)>/g;
-
       let match = null, parseFrom = 0;
-      while (match = imageRegexp.exec(node.data)) {
+      while (match = regexp.exec(node.data)) {
         const previousText = node.data.slice(parseFrom, match.index);
 
         outputNodes.push({
@@ -541,23 +559,19 @@ export function postprocessImages(inputNodes) {
 
         parseFrom = match.index + match[0].length;
 
-        const imageNode = {type: 'image'};
-        const attributes = html.parseAttributes(match[1]);
-
-        imageNode.src = attributes.get('src');
-
         if (previousText.endsWith('\n')) {
           atStartOfLine = true;
         } else if (previousText.length) {
           atStartOfLine = false;
         }
 
-        imageNode.inline = (() => {
-          // Images can force themselves to be rendered inline using a custom
-          // attribute - this style just works better for certain embeds,
-          // usually jokes or small images.
-          if (attributes.get('inline')) return true;
+        const attributes =
+          html.parseAttributes(match[1]);
 
+        const remainingTextInNode =
+          node.data.slice(parseFrom);
+
+        const inline = (() => {
           // If we've already determined we're in the middle of a line,
           // we're inline. (Of course!)
           if (!atStartOfLine) {
@@ -566,42 +580,33 @@ export function postprocessImages(inputNodes) {
 
           // If there's more text to go in this text node, and what's
           // remaining doesn't start with a line break, we're inline.
-          if (
-            parseFrom !== node.data.length &&
-            node.data[parseFrom] !== '\n'
-          ) {
+          if (remainingTextInNode && remainingTextInNode[0] !== '\n') {
             return true;
           }
 
           // If we're at the end of this text node, but this text node
           // isn't the last node overall, we're inline.
-          if (
-            parseFrom === node.data.length &&
-            node !== lastNode
-          ) {
+          if (!remainingTextInNode && node !== lastNode) {
             return true;
           }
 
-          // If no other condition matches, this image is on its own line.
+          // If no other condition matches, this tag is on its own line.
           return false;
         })();
 
-        if (attributes.get('link')) imageNode.link = attributes.get('link');
-        if (attributes.get('style')) imageNode.style = attributes.get('style');
-        if (attributes.get('width')) imageNode.width = parseInt(attributes.get('width'));
-        if (attributes.get('height')) imageNode.height = parseInt(attributes.get('height'));
-        if (attributes.get('align')) imageNode.align = attributes.get('align');
-        if (attributes.get('pixelate')) imageNode.pixelate = true;
-
-        if (attributes.get('warning')) {
-          imageNode.warnings =
-            attributes.get('warning').split(', ');
+        try {
+          outputNodes.push(
+            callback(attributes, {
+              inline,
+            }));
+        } catch (caughtError) {
+          errors.push(new Error(
+            `Failed to process ${match[0]}`,
+            {cause: caughtError}));
         }
 
-        outputNodes.push(imageNode);
-
-        // No longer at the start of a line after an image - there will at
-        // least be a text node with only '\n' before the next image that's
+        // No longer at the start of a line after the tag - there will at
+        // least be text with only '\n' before the next of this tag that's
         // on its own line.
         atStartOfLine = false;
       }
@@ -621,57 +626,85 @@ export function postprocessImages(inputNodes) {
     outputNodes.push(node);
   }
 
+  if (!empty(errors)) {
+    throw new AggregateError(
+      errors,
+    `Errors postprocessing <${tagName}> tags`);
+  }
+
   return outputNodes;
 }
 
-export function postprocessVideos(inputNodes) {
-  const outputNodes = [];
+function complainAboutMediaSrc(src) {
+  if (!src) {
+    throw new Error(`Missing "src" attribute`);
+  }
 
-  for (const node of inputNodes) {
-    if (node.type !== 'text') {
-      outputNodes.push(node);
-      continue;
-    }
+  if (src.startsWith('/media/')) {
+    throw new Error(`Start "src" with "media/", not "/media/"`);
+  }
+}
 
-    const videoRegexp = /<video (.*?)>(<\/video>)?/g;
+export function postprocessImages(inputNodes) {
+  return postprocessHTMLTags(inputNodes, 'img',
+    (attributes, {inline}) => {
+      const node = {type: 'image'};
 
-    let match = null, parseFrom = 0;
-    while (match = videoRegexp.exec(node.data)) {
-      const previousText = node.data.slice(parseFrom, match.index);
+      node.src = attributes.get('src');
+      complainAboutMediaSrc(node.src);
 
-      outputNodes.push({
-        type: 'text',
-        data: previousText,
-        i: node.i + parseFrom,
-        iEnd: node.i + parseFrom + match.index,
-      });
+      node.inline = attributes.get('inline') ?? inline;
 
-      parseFrom = match.index + match[0].length;
+      if (attributes.get('link')) node.link = attributes.get('link');
+      if (attributes.get('style')) node.style = attributes.get('style');
+      if (attributes.get('width')) node.width = parseInt(attributes.get('width'));
+      if (attributes.get('height')) node.height = parseInt(attributes.get('height'));
+      if (attributes.get('align')) node.align = attributes.get('align');
+      if (attributes.get('pixelate')) node.pixelate = true;
 
-      const videoNode = {type: 'video'};
-      const attributes = html.parseAttributes(match[1]);
+      if (attributes.get('warning')) {
+        node.warnings =
+          attributes.get('warning').split(', ');
+      }
 
-      videoNode.src = attributes.get('src');
+      return node;
+    });
+}
 
-      if (attributes.get('width')) videoNode.width = parseInt(attributes.get('width'));
-      if (attributes.get('height')) videoNode.height = parseInt(attributes.get('height'));
-      if (attributes.get('align')) videoNode.align = attributes.get('align');
-      if (attributes.get('pixelate')) videoNode.pixelate = true;
+export function postprocessVideos(inputNodes) {
+  return postprocessHTMLTags(inputNodes, 'video',
+    (attributes, {inline}) => {
+      const node = {type: 'video'};
 
-      outputNodes.push(videoNode);
-    }
+      node.src = attributes.get('src');
+      complainAboutMediaSrc(node.src);
 
-    if (parseFrom !== node.data.length) {
-      outputNodes.push({
-        type: 'text',
-        data: node.data.slice(parseFrom),
-        i: node.i + parseFrom,
-        iEnd: node.iEnd,
-      });
-    }
-  }
+      node.inline = attributes.get('inline') ?? inline;
 
-  return outputNodes;
+      if (attributes.get('width')) node.width = parseInt(attributes.get('width'));
+      if (attributes.get('height')) node.height = parseInt(attributes.get('height'));
+      if (attributes.get('align')) node.align = attributes.get('align');
+      if (attributes.get('pixelate')) node.pixelate = true;
+
+      return node;
+    });
+}
+
+export function postprocessAudios(inputNodes) {
+  return postprocessHTMLTags(inputNodes, 'audio',
+    (attributes, {inline}) => {
+      const node = {type: 'audio'};
+
+      node.src = attributes.get('src');
+      complainAboutMediaSrc(node.src);
+
+      node.inline = attributes.get('inline') ?? inline;
+
+      if (attributes.get('align')) node.align = attributes.get('align');
+      if (attributes.get('nameless')) node.nameless = true;
+
+      return node;
+    });
 }
 
 export function postprocessHeadings(inputNodes) {
@@ -769,109 +802,234 @@ export function postprocessExternalLinks(inputNodes) {
       continue;
     }
 
-    const plausibleLinkRegexp = /\[.*?\)/g;
-
-    let textContent = '';
+    let textNode = {
+      i: node.i,
+      iEnd: null,
+      type: 'text',
+      data: '',
+    };
 
-    let plausibleMatch = null, parseFrom = 0;
-    while (plausibleMatch = plausibleLinkRegexp.exec(node.data)) {
-      textContent += node.data.slice(parseFrom, plausibleMatch.index);
-
-      // Pedantic rules use more particular parentheses detection in link
-      // destinations - they allow one level of balanced parentheses, and
-      // otherwise, parentheses must be escaped. This allows for entire links
-      // to be wrapped in parentheses, e.g below:
-      //
-      //   This is so cool. ([You know??](https://example.com))
-      //
-      const definiteMatch =
-        marked.Lexer.rules.inline.pedantic.link
-          .exec(node.data.slice(plausibleMatch.index));
-
-      if (definiteMatch) {
-        const {1: label, 2: href} = definiteMatch;
-
-        // Split the containing text node into two - the second of these will
-        // be added after iterating over matches, or by the next match.
-        if (textContent.length) {
-          outputNodes.push({type: 'text', data: textContent});
-          textContent = '';
-        }
+    let parseFrom = 0;
+    for (const match of matchMarkdownLinks(node.data, {marked})) {
+      const {label, href, index, length} = match;
 
-        const offset = plausibleMatch.index + definiteMatch.index;
-        const length = definiteMatch[0].length;
+      textNode.data += node.data.slice(parseFrom, index);
 
-        outputNodes.push({
-          i: node.i + offset,
-          iEnd: node.i + offset + length,
-          type: 'external-link',
-          data: {label, href},
-        });
+      // Split the containing text node into two - the second of these will
+      // be filled in and pushed by the next match, or after iterating over
+      // all matches.
+      if (textNode.data) {
+        textNode.iEnd = textNode.i + textNode.data.length;
+        outputNodes.push(textNode);
 
-        parseFrom = offset + length;
-      } else {
-        parseFrom = plausibleMatch.index;
+        textNode = {
+          i: node.i + index + length,
+          iEnd: null,
+          type: 'text',
+          data: '',
+        };
       }
+
+      outputNodes.push({
+        i: node.i + index,
+        iEnd: node.i + index + length,
+        type: 'external-link',
+        data: {label, href},
+      });
+
+      parseFrom = index + length;
     }
 
     if (parseFrom !== node.data.length) {
-      textContent += node.data.slice(parseFrom);
+      textNode.data += node.data.slice(parseFrom);
+      textNode.iEnd = node.iEnd;
     }
 
-    if (textContent.length) {
-      outputNodes.push({type: 'text', data: textContent});
+    if (textNode.data) {
+      outputNodes.push(textNode);
     }
   }
 
   return outputNodes;
 }
 
-export function parseInput(input) {
+export function parseContentNodes(input, {
+  errorMode = 'throw',
+} = {}) {
   if (typeof input !== 'string') {
     throw new TypeError(`Expected input to be string, got ${typeAppearance(input)}`);
   }
 
-  try {
-    let output = parseNodes(input, 0);
-    output = postprocessComments(output);
-    output = postprocessImages(output);
-    output = postprocessVideos(output);
-    output = postprocessHeadings(output);
-    output = postprocessSummaries(output);
-    output = postprocessExternalLinks(output);
-    return output;
-  } catch (errorNode) {
-    if (errorNode.type !== 'error') {
-      throw errorNode;
+  let result = null, error = null;
+
+  process: {
+    try {
+      result = parseNodes(input, 0);
+    } catch (caughtError) {
+      if (caughtError.type === 'error') {
+        const {i, data: {message}} = caughtError;
+
+        let lineStart = input.slice(0, i).lastIndexOf('\n');
+        if (lineStart >= 0) {
+          lineStart += 1;
+        } else {
+          lineStart = 0;
+        }
+
+        let lineEnd = input.slice(i).indexOf('\n');
+        if (lineEnd >= 0) {
+          lineEnd += i;
+        } else {
+          lineEnd = input.length;
+        }
+
+        const line = input.slice(lineStart, lineEnd);
+
+        const cursor = i - lineStart;
+
+        error =
+          new SyntaxError(
+            `Parse error (at pos ${i}): ${message}\n` +
+            line + `\n` +
+            '-'.repeat(cursor) + '^');
+      } else {
+        error = caughtError;
+      }
+
+      // A parse error means there's no output to continue with at all,
+      // so stop here.
+      break process;
     }
 
-    const {
-      i,
-      data: {message},
-    } = errorNode;
+    const postprocessErrors = [];
+
+    for (const postprocess of [
+      postprocessComments,
+      postprocessImages,
+      postprocessVideos,
+      postprocessAudios,
+      postprocessHeadings,
+      postprocessSummaries,
+      postprocessExternalLinks,
+    ]) {
+      try {
+        result = postprocess(result);
+      } catch (caughtError) {
+        const error =
+          new Error(
+            `Error in step ${`"${postprocess.name}"`}`,
+            {cause: caughtError});
+
+        error[Symbol.for('hsmusic.aggregate.translucent')] = true;
+
+        postprocessErrors.push(error);
+      }
+    }
 
-    let lineStart = input.slice(0, i).lastIndexOf('\n');
-    if (lineStart >= 0) {
-      lineStart += 1;
-    } else {
-      lineStart = 0;
+    if (!empty(postprocessErrors)) {
+      error =
+        new AggregateError(
+          postprocessErrors,
+        `Errors postprocessing content text`);
+
+      error[Symbol.for('hsmusic.aggregate.translucent')] = 'single';
     }
+  }
 
-    let lineEnd = input.slice(i).indexOf('\n');
-    if (lineEnd >= 0) {
-      lineEnd += i;
+  if (errorMode === 'throw') {
+    if (error) {
+      throw error;
     } else {
-      lineEnd = input.length;
+      return result;
     }
+  } else if (errorMode === 'return') {
+    if (!result) {
+      result = [{
+        i: 0,
+        iEnd: input.length,
+        type: 'text',
+        data: input,
+      }];
+    }
+
+    return {error, result};
+  } else {
+    throw new Error(`Unknown errorMode ${errorMode}`);
+  }
+}
 
-    const line = input.slice(lineStart, lineEnd);
+export function* splitContentNodesAround(nodes, splitter) {
+  if (splitter instanceof RegExp) {
+    const regex = splitter;
 
-    const cursor = i - lineStart;
+    splitter = function*(text) {
+      for (const match of text.matchAll(regex)) {
+        yield {
+          index: match.index,
+          length: match[0].length,
+        };
+      }
+    };
+  }
 
-    throw new SyntaxError([
-      `Parse error (at pos ${i}): ${message}`,
-      line,
-      '-'.repeat(cursor) + '^',
-    ].join('\n'));
+  if (typeof splitter === 'string') {
+    throw new TypeError(`Expected generator or regular expression`);
+  }
+
+  function* splitTextNode(node) {
+    let textNode = {
+      i: node.i,
+      iEnd: null,
+      type: 'text',
+      data: '',
+    };
+
+    let parseFrom = 0;
+    for (const match of splitter(node.data)) {
+      const {index, length} = match;
+
+      textNode.data += node.data.slice(parseFrom, index);
+
+      if (textNode.data) {
+        textNode.iEnd = textNode.i + textNode.data.length;
+        yield textNode;
+      }
+
+      yield {
+        i: node.i + index,
+        iEnd: node.i + index + length,
+        type: 'separator',
+        data: {
+          text: node.data.slice(index, index + length),
+          match,
+        },
+      };
+
+      textNode = {
+        i: node.i + index + length,
+        iEnd: null,
+        type: 'text',
+        data: '',
+      };
+
+      parseFrom = index + length;
+    }
+
+    if (parseFrom !== node.data.length) {
+      textNode.data += node.data.slice(parseFrom);
+      textNode.iEnd = node.iEnd;
+    }
+
+    if (textNode.data) {
+      yield textNode;
+    }
+  }
+
+  for (const node of nodes) {
+    if (node.type === 'text') {
+      yield* splitTextNode(node);
+    } else {
+      yield node;
+    }
   }
 }