« get me outta code hell

content: transformContent: gracefully merge marked output & tag nodes - hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
diff options
context:
space:
mode:
author(quasar) nebula <qznebula@protonmail.com>2023-08-16 15:22:34 -0300
committer(quasar) nebula <qznebula@protonmail.com>2023-08-16 15:22:34 -0300
commit9d120f85fc50dd16a3b47efde25e02f7e9cc6e79 (patch)
treeda67e64d37343d850b282d2ac9589b0128363bdf
parent3a866a0b3d452d4aeb77d8a1e63ba8d5a2913296 (diff)
content: transformContent: gracefully merge marked output & tag nodes
-rw-r--r--src/content/dependencies/transformContent.js124
-rw-r--r--tap-snapshots/test/snapshot/transformContent.js.test.cjs14
2 files changed, 91 insertions, 47 deletions
diff --git a/src/content/dependencies/transformContent.js b/src/content/dependencies/transformContent.js
index 68d7eaa4..3d93284d 100644
--- a/src/content/dependencies/transformContent.js
+++ b/src/content/dependencies/transformContent.js
@@ -346,6 +346,7 @@ export default {
             if (node.inline) {
               return {
                 type: 'image',
+                inline: true,
                 data:
                   html.tag('img', {src, width, height}),
               };
@@ -355,6 +356,7 @@ export default {
 
             return {
               type: 'image',
+              inline: false,
               data:
                 html.tag('div', {class: 'content-image'},
                   image.slots({
@@ -454,20 +456,84 @@ export default {
       mangle: false,
     };
 
+    // The content of non-text nodes can end up getting mangled by marked.
+    // To avoid this, we replace them with mundane placeholders, then
+    // reinsert the content in the correct positions. This also avoids
+    // having to stringify tag content within this generate() function.
+
+    const extractNonTextNodes = ({
+      getTextNodeContents = node => node.data,
+    } = {}) =>
+      contentFromNodes
+        .map((node, index) => {
+          if (node.type === 'text') {
+            return getTextNodeContents(node, index);
+          }
+
+          const attributes = html.attributes({
+            class: 'INSERT-NON-TEXT',
+            'data-type': node.type,
+          });
+
+          if (node.type === 'image') {
+            attributes.set('data-inline', node.inline);
+          }
+
+          return `<span ${attributes}>${index}</span>`;
+        })
+        .join('');
+
+    const reinsertNonTextNodes = (markedOutput) => {
+      markedOutput = markedOutput.trim();
+
+      const tags = [];
+      const regexp = /<span class="INSERT-NON-TEXT" (.*?)>([0-9]+?)<\/span>/g;
+
+      let deleteParagraph = false;
+
+      const addText = (text) => {
+        if (deleteParagraph) {
+          text = text.replace(/^<\/p>/, '');
+          deleteParagraph = false;
+        }
+
+        tags.push(text);
+      };
+
+      let match = null, parseFrom = 0;
+      while (match = regexp.exec(markedOutput)) {
+        addText(markedOutput.slice(parseFrom, match.index));
+        parseFrom = match.index + match[0].length;
+
+        const attributes = html.parseAttributes(match[1]);
+
+        // Images that were all on their own line need to be removed from
+        // the surrounding <p> tag that marked generates. The HTML parser
+        // treats a <div> that starts inside a <p> as a Crocker-class
+        // misgiving, and will treat you very badly if you feed it that.
+        if (attributes.get('data-type') === 'image') {
+          if (!attributes.get('data-inline')) {
+            tags[tags.length - 1] = tags[tags.length - 1].replace(/<p>$/, '');
+            deleteParagraph = true;
+          }
+        }
+
+        const nonTextNodeIndex = match[2];
+        tags.push(contentFromNodes[nonTextNodeIndex].data);
+      }
+
+      if (parseFrom !== markedOutput.length) {
+        addText(markedOutput.slice(parseFrom));
+      }
+
+      return html.tags(tags, {[html.joinChildren]: ''});
+    };
+
     // This is separated into its own function just since we're gonna reuse
     // it in a minute if everything goes to heck in lyrics mode.
     const transformMultiline = () => {
       const markedInput =
-        contentFromNodes
-          .map(node => {
-            if (node.type === 'text') {
-              return node.data;
-            } else {
-              return node.data.toString();
-            }
-          })
-          .join('')
-
+        extractNonTextNodes()
           // Compress multiple line breaks into single line breaks.
           .replace(/\n{2,}/g, '\n')
           // Expand line breaks which don't follow a list, quote,
@@ -479,22 +545,12 @@ export default {
           .replace(/(?<=^>.*)\n+(?!^>)/gm, '\n\n');
 
       const markedOutput =
-        marked.parse(markedInput, markedOptions)
-          // Images that were all on their own line need to be removed from
-          // the surrounding <p> tag that marked generates. The HTML parser
-          // treats a <div> that starts inside a <p> as a Crocker-class
-          // misgiving, and will treat you very badly if you feed it that.
-          .replace(
-            /^<p>(<a class="[^"]*?image-link.*?<\/a>)<\/p>$/gm,
-            (match, a) => a);
-
-      return markedOutput;
+        marked.parse(markedInput, markedOptions);
+
+      return reinsertNonTextNodes(markedOutput);
     }
 
     if (slots.mode === 'multiline') {
-      // Unfortunately, we kind of have to be super evil here and stringify
-      // the links, or else parse marked's output into html tags, which is
-      // very out of scope at the moment.
       return transformMultiline();
     }
 
@@ -514,15 +570,9 @@ export default {
         return transformMultiline();
       }
 
-      // Lyrics mode is also evil for the same stringifying reasons as
-      // multiline.
-      return marked.parse(
-        contentFromNodes
-          .map((node, index) => {
-            if (node.type !== 'text') {
-              return node.data.toString();
-            }
-
+      const markedInput =
+        extractNonTextNodes({
+          getTextNodeContents(node, index) {
             // First, replace line breaks that follow text content with
             // <br> tags.
             let content = node.data.replace(/(?!^)\n/gm, '<br>\n');
@@ -541,9 +591,13 @@ export default {
             }
 
             return content;
-          })
-          .join(''),
-        markedOptions);
+          },
+        });
+
+      const markedOutput =
+        marked.parse(markedInput, markedOptions);
+
+      return reinsertNonTextNodes(markedOutput);
     }
   },
 }
diff --git a/tap-snapshots/test/snapshot/transformContent.js.test.cjs b/tap-snapshots/test/snapshot/transformContent.js.test.cjs
index dd859b2f..d144cf12 100644
--- a/tap-snapshots/test/snapshot/transformContent.js.test.cjs
+++ b/tap-snapshots/test/snapshot/transformContent.js.test.cjs
@@ -8,24 +8,21 @@
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > dates 1`] = `
 <p><time datetime="Thu, 13 Apr 2023 00:00:00 GMT">4/12/2023</time> Yep!</p>
 <p>Very nice: <time datetime="Fri, 25 Oct 2413 03:00:00 GMT">10/25/2413</time></p>
-
 `
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > inline images 1`] = `
 <p><img src="snooping.png"> as USUAL...</p>
 <p>What do you know? <img src="cowabunga.png" width="24" height="32"></p>
-<p><a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">I&#39;m on the left.</a><img src="im-on-the-right.jpg"></p>
-<p><img src="im-on-the-left.jpg"><a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">I&#39;m on the right.</a></p>
+<p><a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">I'm on the left.</a><img src="im-on-the-right.jpg"></p>
+<p><img src="im-on-the-left.jpg"><a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">I'm on the right.</a></p>
 <p>Media time! <img src="to-media.path/misc/interesting.png"> Oh yeah!</p>
 <p><img src="must.png"><img src="stick.png"><img src="together.png"></p>
 <p>And... all done! <img src="end-of-source.png"></p>
-
 `
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > links to a thing 1`] = `
 <p>This is <a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">my favorite album</a>.</p>
 <p>That&#39;s right, <a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">Cool Album</a>!</p>
-
 `
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > lyrics - basic line breaks 1`] = `
@@ -34,7 +31,6 @@ And away we go<br>
 Truly, music</p>
 <p>(Oh yeah)<br>
 (That&#39;s right)</p>
-
 `
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > lyrics - line breaks around tags 1`] = `
@@ -48,14 +44,12 @@ I say, the date be <time datetime="Tue, 13 Apr 2004 03:00:00 GMT">4/13/2004</tim
 <time datetime="Tue, 13 Apr 2004 03:00:00 GMT">4/13/2004</time><br></p>
 <p><time datetime="Tue, 13 Apr 2004 03:00:00 GMT">4/13/2004</time><br>
 <time datetime="Tue, 13 Apr 2004 03:00:00 GMT">4/13/2004</time>, and don&#39;t ye forget it</p>
-
 `
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > lyrics - repeated and edge line breaks 1`] = `
 <p>Well, you know<br>
 How it goes</p>
 <p>Yessiree</p>
-
 `
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > non-inline image #1 1`] = `
@@ -69,18 +63,14 @@ exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > non
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > non-inline image #3 1`] = `
 <div class="content-image"><a class="box image-link" href="spark.png"><div class="image-container"><div class="image-inner-area"><img src="spark.large.jpg"></div></div></a></div>
-
 <p>Baller.</p>
-
 `
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > super basic string 1`] = `
 <p>Neat listing: Albums - by Date</p>
-
 `
 
 exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > two text paragraphs 1`] = `
 <p>Hello, world!</p>
 <p>Wow, this is very cool.</p>
-
 `