diff options
author | (quasar) nebula <qznebula@protonmail.com> | 2023-08-16 15:22:34 -0300 |
---|---|---|
committer | (quasar) nebula <qznebula@protonmail.com> | 2023-08-16 15:22:34 -0300 |
commit | 9d120f85fc50dd16a3b47efde25e02f7e9cc6e79 (patch) | |
tree | da67e64d37343d850b282d2ac9589b0128363bdf | |
parent | 3a866a0b3d452d4aeb77d8a1e63ba8d5a2913296 (diff) |
content: transformContent: gracefully merge marked output & tag nodes
-rw-r--r-- | src/content/dependencies/transformContent.js | 124 | ||||
-rw-r--r-- | tap-snapshots/test/snapshot/transformContent.js.test.cjs | 14 |
2 files changed, 91 insertions, 47 deletions
diff --git a/src/content/dependencies/transformContent.js b/src/content/dependencies/transformContent.js index 68d7eaa4..3d93284d 100644 --- a/src/content/dependencies/transformContent.js +++ b/src/content/dependencies/transformContent.js @@ -346,6 +346,7 @@ export default { if (node.inline) { return { type: 'image', + inline: true, data: html.tag('img', {src, width, height}), }; @@ -355,6 +356,7 @@ export default { return { type: 'image', + inline: false, data: html.tag('div', {class: 'content-image'}, image.slots({ @@ -454,20 +456,84 @@ export default { mangle: false, }; + // The content of non-text nodes can end up getting mangled by marked. + // To avoid this, we replace them with mundane placeholders, then + // reinsert the content in the correct positions. This also avoids + // having to stringify tag content within this generate() function. + + const extractNonTextNodes = ({ + getTextNodeContents = node => node.data, + } = {}) => + contentFromNodes + .map((node, index) => { + if (node.type === 'text') { + return getTextNodeContents(node, index); + } + + const attributes = html.attributes({ + class: 'INSERT-NON-TEXT', + 'data-type': node.type, + }); + + if (node.type === 'image') { + attributes.set('data-inline', node.inline); + } + + return `<span ${attributes}>${index}</span>`; + }) + .join(''); + + const reinsertNonTextNodes = (markedOutput) => { + markedOutput = markedOutput.trim(); + + const tags = []; + const regexp = /<span class="INSERT-NON-TEXT" (.*?)>([0-9]+?)<\/span>/g; + + let deleteParagraph = false; + + const addText = (text) => { + if (deleteParagraph) { + text = text.replace(/^<\/p>/, ''); + deleteParagraph = false; + } + + tags.push(text); + }; + + let match = null, parseFrom = 0; + while (match = regexp.exec(markedOutput)) { + addText(markedOutput.slice(parseFrom, match.index)); + parseFrom = match.index + match[0].length; + + const attributes = html.parseAttributes(match[1]); + + // Images that were all on their own line need to be removed from + // the surrounding <p> tag that marked generates. The HTML parser + // treats a <div> that starts inside a <p> as a Crocker-class + // misgiving, and will treat you very badly if you feed it that. + if (attributes.get('data-type') === 'image') { + if (!attributes.get('data-inline')) { + tags[tags.length - 1] = tags[tags.length - 1].replace(/<p>$/, ''); + deleteParagraph = true; + } + } + + const nonTextNodeIndex = match[2]; + tags.push(contentFromNodes[nonTextNodeIndex].data); + } + + if (parseFrom !== markedOutput.length) { + addText(markedOutput.slice(parseFrom)); + } + + return html.tags(tags, {[html.joinChildren]: ''}); + }; + // This is separated into its own function just since we're gonna reuse // it in a minute if everything goes to heck in lyrics mode. const transformMultiline = () => { const markedInput = - contentFromNodes - .map(node => { - if (node.type === 'text') { - return node.data; - } else { - return node.data.toString(); - } - }) - .join('') - + extractNonTextNodes() // Compress multiple line breaks into single line breaks. .replace(/\n{2,}/g, '\n') // Expand line breaks which don't follow a list, quote, @@ -479,22 +545,12 @@ export default { .replace(/(?<=^>.*)\n+(?!^>)/gm, '\n\n'); const markedOutput = - marked.parse(markedInput, markedOptions) - // Images that were all on their own line need to be removed from - // the surrounding <p> tag that marked generates. The HTML parser - // treats a <div> that starts inside a <p> as a Crocker-class - // misgiving, and will treat you very badly if you feed it that. - .replace( - /^<p>(<a class="[^"]*?image-link.*?<\/a>)<\/p>$/gm, - (match, a) => a); - - return markedOutput; + marked.parse(markedInput, markedOptions); + + return reinsertNonTextNodes(markedOutput); } if (slots.mode === 'multiline') { - // Unfortunately, we kind of have to be super evil here and stringify - // the links, or else parse marked's output into html tags, which is - // very out of scope at the moment. return transformMultiline(); } @@ -514,15 +570,9 @@ export default { return transformMultiline(); } - // Lyrics mode is also evil for the same stringifying reasons as - // multiline. - return marked.parse( - contentFromNodes - .map((node, index) => { - if (node.type !== 'text') { - return node.data.toString(); - } - + const markedInput = + extractNonTextNodes({ + getTextNodeContents(node, index) { // First, replace line breaks that follow text content with // <br> tags. let content = node.data.replace(/(?!^)\n/gm, '<br>\n'); @@ -541,9 +591,13 @@ export default { } return content; - }) - .join(''), - markedOptions); + }, + }); + + const markedOutput = + marked.parse(markedInput, markedOptions); + + return reinsertNonTextNodes(markedOutput); } }, } diff --git a/tap-snapshots/test/snapshot/transformContent.js.test.cjs b/tap-snapshots/test/snapshot/transformContent.js.test.cjs index dd859b2f..d144cf12 100644 --- a/tap-snapshots/test/snapshot/transformContent.js.test.cjs +++ b/tap-snapshots/test/snapshot/transformContent.js.test.cjs @@ -8,24 +8,21 @@ exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > dates 1`] = ` <p><time datetime="Thu, 13 Apr 2023 00:00:00 GMT">4/12/2023</time> Yep!</p> <p>Very nice: <time datetime="Fri, 25 Oct 2413 03:00:00 GMT">10/25/2413</time></p> - ` exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > inline images 1`] = ` <p><img src="snooping.png"> as USUAL...</p> <p>What do you know? <img src="cowabunga.png" width="24" height="32"></p> -<p><a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">I'm on the left.</a><img src="im-on-the-right.jpg"></p> -<p><img src="im-on-the-left.jpg"><a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">I'm on the right.</a></p> +<p><a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">I'm on the left.</a><img src="im-on-the-right.jpg"></p> +<p><img src="im-on-the-left.jpg"><a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">I'm on the right.</a></p> <p>Media time! <img src="to-media.path/misc/interesting.png"> Oh yeah!</p> <p><img src="must.png"><img src="stick.png"><img src="together.png"></p> <p>And... all done! <img src="end-of-source.png"></p> - ` exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > links to a thing 1`] = ` <p>This is <a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">my favorite album</a>.</p> <p>That's right, <a href="to-localized.album/cool-album" style="--primary-color: #123456; --dim-color: #000000">Cool Album</a>!</p> - ` exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > lyrics - basic line breaks 1`] = ` @@ -34,7 +31,6 @@ And away we go<br> Truly, music</p> <p>(Oh yeah)<br> (That's right)</p> - ` exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > lyrics - line breaks around tags 1`] = ` @@ -48,14 +44,12 @@ I say, the date be <time datetime="Tue, 13 Apr 2004 03:00:00 GMT">4/13/2004</tim <time datetime="Tue, 13 Apr 2004 03:00:00 GMT">4/13/2004</time><br></p> <p><time datetime="Tue, 13 Apr 2004 03:00:00 GMT">4/13/2004</time><br> <time datetime="Tue, 13 Apr 2004 03:00:00 GMT">4/13/2004</time>, and don't ye forget it</p> - ` exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > lyrics - repeated and edge line breaks 1`] = ` <p>Well, you know<br> How it goes</p> <p>Yessiree</p> - ` exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > non-inline image #1 1`] = ` @@ -69,18 +63,14 @@ exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > non exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > non-inline image #3 1`] = ` <div class="content-image"><a class="box image-link" href="spark.png"><div class="image-container"><div class="image-inner-area"><img src="spark.large.jpg"></div></div></a></div> - <p>Baller.</p> - ` exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > super basic string 1`] = ` <p>Neat listing: Albums - by Date</p> - ` exports[`test/snapshot/transformContent.js TAP transformContent (snapshot) > two text paragraphs 1`] = ` <p>Hello, world!</p> <p>Wow, this is very cool.</p> - ` |