replacer: factor out postprocessHTMLTags

Only for img and video atm, h2 is still ground-up
author: (quasar) nebula <qznebula@protonmail.com> 2025-03-24 15:18:15 -0300
committer: (quasar) nebula <qznebula@protonmail.com> 2025-03-24 15:18:15 -0300
commit: 10a87b70b41bf4c49615d7271adc8e3944c08719 (patch)
tree: f919a101ba1a0491004ca62116cf4eb48f38a02e
parent: 0ee5fb62a6c67548fccd4fc497e28cb03bcee06e (diff)
1 files changed, 59 insertions, 83 deletions
diff --git a/src/replacer.js b/src/replacer.js
index 07c38478..cbe6b587 100644
--- a/src/replacer.js
+++ b/src/replacer.js
@@ -518,23 +518,29 @@ export function postprocessComments(inputNodes) {
   return outputNodes;
 }
 
-export function postprocessImages(inputNodes) {
+function postprocessHTMLTags(inputNodes, tagName, callback) {
   const outputNodes = [];
 
-  let atStartOfLine = true;
-
   const lastNode = inputNodes.at(-1);
 
+  const regexp =
+    new RegExp(
+      `<${tagName} (.*?)>` +
+      (html.selfClosingTags.includes(tagName)
+        ? ''
+        : `(?:</${tagName}>)?`),
+      'g');
+
+  let atStartOfLine = true;
+
   for (const node of inputNodes) {
     if (node.type === 'tag') {
       atStartOfLine = false;
     }
 
     if (node.type === 'text') {
-      const imageRegexp = /<img (.*?)>/g;
-
       let match = null, parseFrom = 0;
-      while (match = imageRegexp.exec(node.data)) {
+      while (match = regexp.exec(node.data)) {
         const previousText = node.data.slice(parseFrom, match.index);
 
         outputNodes.push({
@@ -546,23 +552,19 @@ export function postprocessImages(inputNodes) {
 
         parseFrom = match.index + match[0].length;
 
-        const imageNode = {type: 'image'};
-        const attributes = html.parseAttributes(match[1]);
-
-        imageNode.src = attributes.get('src');
-
         if (previousText.endsWith('\n')) {
           atStartOfLine = true;
         } else if (previousText.length) {
           atStartOfLine = false;
         }
 
-        imageNode.inline = (() => {
-          // Images can force themselves to be rendered inline using a custom
-          // attribute - this style just works better for certain embeds,
-          // usually jokes or small images.
-          if (attributes.get('inline')) return true;
+        const attributes =
+          html.parseAttributes(match[1]);
 
+        const remainingTextInNode =
+          node.data.slice(parseFrom);
+
+        const inline = (() => {
           // If we've already determined we're in the middle of a line,
           // we're inline. (Of course!)
           if (!atStartOfLine) {
@@ -571,42 +573,27 @@ export function postprocessImages(inputNodes) {
 
           // If there's more text to go in this text node, and what's
           // remaining doesn't start with a line break, we're inline.
-          if (
-            parseFrom !== node.data.length &&
-            node.data[parseFrom] !== '\n'
-          ) {
+          if (remainingTextInNode && remainingTextInNode[0] !== '\n') {
             return true;
           }
 
           // If we're at the end of this text node, but this text node
           // isn't the last node overall, we're inline.
-          if (
-            parseFrom === node.data.length &&
-            node !== lastNode
-          ) {
+          if (!remainingTextInNode && node !== lastNode) {
             return true;
           }
 
-          // If no other condition matches, this image is on its own line.
+          // If no other condition matches, this tag is on its own line.
           return false;
         })();
 
-        if (attributes.get('link')) imageNode.link = attributes.get('link');
-        if (attributes.get('style')) imageNode.style = attributes.get('style');
-        if (attributes.get('width')) imageNode.width = parseInt(attributes.get('width'));
-        if (attributes.get('height')) imageNode.height = parseInt(attributes.get('height'));
-        if (attributes.get('align')) imageNode.align = attributes.get('align');
-        if (attributes.get('pixelate')) imageNode.pixelate = true;
+        outputNodes.push(
+          callback(attributes, {
+            inline,
+          }));
 
-        if (attributes.get('warning')) {
-          imageNode.warnings =
-            attributes.get('warning').split(', ');
-        }
-
-        outputNodes.push(imageNode);
-
-        // No longer at the start of a line after an image - there will at
-        // least be a text node with only '\n' before the next image that's
+        // No longer at the start of a line after the tag - there will at
+        // least be text with only '\n' before the next of this tag that's
         // on its own line.
         atStartOfLine = false;
       }
@@ -629,54 +616,43 @@ export function postprocessImages(inputNodes) {
   return outputNodes;
 }
 
-export function postprocessVideos(inputNodes) {
-  const outputNodes = [];
-
-  for (const node of inputNodes) {
-    if (node.type !== 'text') {
-      outputNodes.push(node);
-      continue;
-    }
-
-    const videoRegexp = /<video (.*?)>(<\/video>)?/g;
-
-    let match = null, parseFrom = 0;
-    while (match = videoRegexp.exec(node.data)) {
-      const previousText = node.data.slice(parseFrom, match.index);
-
-      outputNodes.push({
-        type: 'text',
-        data: previousText,
-        i: node.i + parseFrom,
-        iEnd: node.i + parseFrom + match.index,
-      });
-
-      parseFrom = match.index + match[0].length;
-
-      const videoNode = {type: 'video'};
-      const attributes = html.parseAttributes(match[1]);
+export function postprocessImages(inputNodes) {
+  return postprocessHTMLTags(inputNodes, 'img',
+    (attributes, {inline}) => {
+      const node = {type: 'image'};
+
+      node.src = attributes.get('src');
+      node.inline = attributes.get('inline') ?? inline;
+
+      if (attributes.get('link')) node.link = attributes.get('link');
+      if (attributes.get('style')) node.style = attributes.get('style');
+      if (attributes.get('width')) node.width = parseInt(attributes.get('width'));
+      if (attributes.get('height')) node.height = parseInt(attributes.get('height'));
+      if (attributes.get('align')) node.align = attributes.get('align');
+      if (attributes.get('pixelate')) node.pixelate = true;
+
+      if (attributes.get('warning')) {
+        node.warnings =
+          attributes.get('warning').split(', ');
+      }
 
-      videoNode.src = attributes.get('src');
+      return node;
+    });
+}
 
-      if (attributes.get('width')) videoNode.width = parseInt(attributes.get('width'));
-      if (attributes.get('height')) videoNode.height = parseInt(attributes.get('height'));
-      if (attributes.get('align')) videoNode.align = attributes.get('align');
-      if (attributes.get('pixelate')) videoNode.pixelate = true;
+export function postprocessVideos(inputNodes) {
+  return postprocessHTMLTags(inputNodes, 'video',
+    attributes => {
+      const node = {type: 'video'};
 
-      outputNodes.push(videoNode);
-    }
+      node.src = attributes.get('src');
 
-    if (parseFrom !== node.data.length) {
-      outputNodes.push({
-        type: 'text',
-        data: node.data.slice(parseFrom),
-        i: node.i + parseFrom,
-        iEnd: node.iEnd,
-      });
-    }
-  }
+      if (attributes.get('width')) node.width = parseInt(attributes.get('width'));
+      if (attributes.get('height')) node.height = parseInt(attributes.get('height'));
+      if (attributes.get('pixelate')) node.pixelate = true;
 
-  return outputNodes;
+      return node;
+    });
 }
 
 export function postprocessHeadings(inputNodes) {
author	(quasar) nebula <qznebula@protonmail.com>	2025-03-24 15:18:15 -0300
committer	(quasar) nebula <qznebula@protonmail.com>	2025-03-24 15:18:15 -0300
commit	10a87b70b41bf4c49615d7271adc8e3944c08719 (patch)
tree	f919a101ba1a0491004ca62116cf4eb48f38a02e
parent	0ee5fb62a6c67548fccd4fc497e28cb03bcee06e (diff)