« get me outta code hell

html: resolve(..., {normalize: 'plain'}) - hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
author(quasar) nebula <qznebula@protonmail.com>2025-10-21 17:48:08 -0300
committer(quasar) nebula <qznebula@protonmail.com>2025-10-21 18:31:33 -0300
commite347d0b449028739b5ef88e0bd183f0a747f0e3a (patch)
treee85dc950d1e53ae9d9b0370884ab84c7075a1ce2 /src
parentdb1f23ae99a4ae45a95d5da40e44158c5b74d4dd (diff)
html: resolve(..., {normalize: 'plain'})
Diffstat (limited to 'src')
-rw-r--r--src/html.js334
1 files changed, 225 insertions, 109 deletions
diff --git a/src/html.js b/src/html.js
index 0a868ebd..444edd6a 100644
--- a/src/html.js
+++ b/src/html.js
@@ -2,6 +2,8 @@
 
 import {inspect} from 'node:util';
 
+import striptags from 'striptags';
+
 import {withAggregate} from '#aggregate';
 import {colors} from '#cli';
 import {empty, typeAppearance, unique} from '#sugar';
@@ -39,6 +41,40 @@ export const selfClosingTags = [
   'wbr',
 ];
 
+// Every element under:
+// https://html.spec.whatwg.org/multipage/text-level-semantics.html
+export const textLevelSemanticTags = [
+  'a',
+  'abbr',
+  'b',
+  'bdi',
+  'bdo',
+  'br',
+  'cite',
+  'code',
+  'data',
+  'dfn',
+  'em',
+  'i',
+  'kbd',
+  'mark',
+  'q',
+  'rp',
+  'rt',
+  'ruby',
+  's',
+  'samp',
+  'small',
+  'span',
+  'strong',
+  'sub',
+  'sup',
+  'time',
+  'u',
+  'var',
+  'wbr',
+];
+
 // Not so comprehensive!!
 export const attributeSpec = {
   'class': {
@@ -469,6 +505,7 @@ export class Tag {
 
     this.#content = contentArray;
     this.#content.toString = () => this.#stringifyContent();
+    this.#content.toPlainText = () => this.#plainifyContent();
   }
 
   get content() {
@@ -677,6 +714,10 @@ export class Tag {
         : '\n'));
   }
 
+  toPlainText() {
+    return this.content.toPlainText();
+  }
+
   #getContentJoiner() {
     if (this.joinChildren === undefined) {
       return '\n';
@@ -696,11 +737,8 @@ export class Tag {
 
     const joiner = this.#getContentJoiner();
 
-    let content = '';
     let blockwrapClosers = '';
 
-    let seenSiblingIndependentContent = false;
-
     const chunkwrapSplitter =
       (this.chunkwrap
         ? this.#getAttributeRaw('split')
@@ -711,110 +749,64 @@ export class Tag {
         ? false
         : null);
 
-    let contentItems;
-
-    determineContentItems: {
-      if (this.chunkwrap) {
-        contentItems = smush(this).content;
-        break determineContentItems;
-      }
-
-      contentItems = this.content;
-    }
-
-    for (const [index, item] of contentItems.entries()) {
-      const nonTemplateItem =
-        Template.resolve(item);
-
-      if (nonTemplateItem instanceof Tag && nonTemplateItem.imaginarySibling) {
-        seenSiblingIndependentContent = true;
-        continue;
-      }
+    const contentItems =
+      (this.chunkwrap
+        ? smush(this).content
+        : this.content);
+
+    let content = this.#renderContentItems({
+      from: '',
+      items: contentItems,
+
+      getItemContent: item => item.toString(),
+
+      appendItemContent(content, itemContent, item) {
+        const chunkwrapChunks =
+          (typeof item === 'string' && chunkwrapSplitter
+            ? Array.from(getChunkwrapChunks(itemContent, chunkwrapSplitter))
+            : null);
+
+        const itemIncludesChunkwrapSplit =
+          (chunkwrapChunks
+            ? chunkwrapChunks.length > 1
+            : null);
+
+        if (content) {
+          if (itemIncludesChunkwrapSplit && !seenChunkwrapSplitter) {
+            // The first time we see a chunkwrap splitter, backtrack and wrap
+            // the content *so far* in a chunk. This will be treated just like
+            // any other open chunkwrap, and closed after the first chunk of
+            // this item! (That means the existing content is part of the same
+            // chunk as the first chunk included in this content, which makes
+            // sense, because that first chink is really just more text that
+            // precedes the first split.)
+            content = `<span class="chunkwrap">` + content;
+          }
 
-      let itemContent;
-      try {
-        itemContent = nonTemplateItem.toString();
-      } catch (caughtError) {
-        const indexPart = colors.yellow(`child #${index + 1}`);
-
-        const error =
-          new Error(
-            `Error in ${indexPart} ` +
-            `of ${inspect(this, {compact: true})}`,
-            {cause: caughtError});
-
-        if (this.#traceError && !disabledTagTracing) {
-          error[Symbol.for(`hsmusic.aggregate.alwaysTrace`)] = true;
-          error[Symbol.for(`hsmusic.aggregate.traceFrom`)] = this.#traceError;
-
-          error[Symbol.for(`hsmusic.aggregate.unhelpfulTraceLines`)] = [
-            /content-function\.js/,
-            /util\/html\.js/,
-          ];
-
-          error[Symbol.for(`hsmusic.aggregate.helpfulTraceLines`)] = [
-            /content\/dependencies\/(.*\.js:.*(?=\)))/,
-          ];
+          content += joiner;
+        } else if (itemIncludesChunkwrapSplit) {
+          // We've encountered a chunkwrap split before any other content.
+          // This means there's no content to wrap, no existing chunkwrap
+          // to close, and no reason to add a joiner, but we *do* need to
+          // enter a chunkwrap wrapper *now*, so the first chunk of this
+          // item will be properly wrapped.
+          content = `<span class="chunkwrap">`;
         }
 
-        throw error;
-      }
-
-      if (!itemContent) {
-        continue;
-      }
-
-      if (!(nonTemplateItem instanceof Tag) || !nonTemplateItem.onlyIfSiblings) {
-        seenSiblingIndependentContent = true;
-      }
-
-      const chunkwrapChunks =
-        (typeof nonTemplateItem === 'string' && chunkwrapSplitter
-          ? Array.from(getChunkwrapChunks(itemContent, chunkwrapSplitter))
-          : null);
-
-      const itemIncludesChunkwrapSplit =
-        (chunkwrapChunks
-          ? chunkwrapChunks.length > 1
-          : null);
-
-      if (content) {
-        if (itemIncludesChunkwrapSplit && !seenChunkwrapSplitter) {
-          // The first time we see a chunkwrap splitter, backtrack and wrap
-          // the content *so far* in a chunk. This will be treated just like
-          // any other open chunkwrap, and closed after the first chunk of
-          // this item! (That means the existing content is part of the same
-          // chunk as the first chunk included in this content, which makes
-          // sense, because that first chink is really just more text that
-          // precedes the first split.)
-          content = `<span class="chunkwrap">` + content;
+        if (itemIncludesChunkwrapSplit) {
+          seenChunkwrapSplitter = true;
         }
 
-        content += joiner;
-      } else if (itemIncludesChunkwrapSplit) {
-        // We've encountered a chunkwrap split before any other content.
-        // This means there's no content to wrap, no existing chunkwrap
-        // to close, and no reason to add a joiner, but we *do* need to
-        // enter a chunkwrap wrapper *now*, so the first chunk of this
-        // item will be properly wrapped.
-        content = `<span class="chunkwrap">`;
-      }
-
-      if (itemIncludesChunkwrapSplit) {
-        seenChunkwrapSplitter = true;
-      }
-
-      // Blockwraps only apply if they actually contain some content whose
-      // words should be kept together, so it's okay to put them beneath the
-      // itemContent check. They also never apply at the very start of content,
-      // because at that point there aren't any preceding words from which the
-      // blockwrap would differentiate its content.
-      if (nonTemplateItem instanceof Tag && nonTemplateItem.blockwrap && content) {
-        content += `<span class="blockwrap">`;
-        blockwrapClosers += `</span>`;
-      }
+        // Blockwraps only apply if they actually contain some content whose
+        // words should be kept together, so it's okay to put them beneath the
+        // itemContent check. They also never apply at the very start of content,
+        // because at that point there aren't any preceding words from which the
+        // blockwrap would differentiate its content.
+        if (item instanceof Tag && item.blockwrap && content) {
+          content += `<span class="blockwrap">`;
+          blockwrapClosers += `</span>`;
+        }
 
-      appendItemContent: {
         if (itemIncludesChunkwrapSplit) {
           for (const [index, {chunk, following}] of chunkwrapChunks.entries()) {
             if (index === 0) {
@@ -848,17 +840,15 @@ export class Tag {
             }
           }
 
-          break appendItemContent;
+          return content;
         }
 
-        content += itemContent;
-      }
-    }
+        return content += itemContent;
+      },
+    });
 
-    // If we've only seen sibling-dependent content (or just no content),
-    // then the content in total is blank.
-    if (!seenSiblingIndependentContent) {
-      return '';
+    if (!content.length) {
+      return content;
     }
 
     if (chunkwrapSplitter) {
@@ -878,6 +868,130 @@ export class Tag {
     return content;
   }
 
+  #plainifyContent() {
+    // Doesn't play too nice with transformContent, because that function,
+    // working with the Marked library to process markdown, returns a mix of
+    // raw HTML strings and actual tags - this function only makes nice line
+    // breaks out of actual tags.
+
+    if (this.selfClosing) {
+      return '';
+    }
+
+    let joiner = this.#getContentJoiner();
+
+    if (joiner instanceof Tag && joiner.tagName === 'br') {
+      joiner = '\n';
+    }
+
+    if (joiner === '\n') {
+      joiner = ' ';
+    }
+
+    let content = this.#renderContentItems({
+      from: '',
+      items: this.content,
+
+      getItemContent: item =>
+        (item instanceof Tag
+          ? item.toPlainText()
+          : item.toString()),
+
+      appendItemContent(content, itemContent, item) {
+        if (joiner === ' ') {
+          if (item instanceof Tag && !textLevelSemanticTags.includes(item.tagName)) {
+            content += '\n\n';
+          } else if (!content.endsWith(' ')) {
+            content += ' ';
+          }
+        } else {
+          content += joiner;
+        }
+
+        return content += itemContent;
+      },
+    });
+
+    content =
+      striptags(content)
+        .replaceAll('&#39;', `'`)
+        .replaceAll('&quot;', `"`);
+
+    return content;
+  }
+
+  #renderContentItems(config) {
+    let content = structuredClone(config.from);
+
+    let seenSiblingIndependentContent = false;
+
+    for (const [index, item] of config.items.entries()) {
+      const nonTemplateItem = Template.resolve(item);
+
+      if (nonTemplateItem instanceof Tag && nonTemplateItem.imaginarySibling) {
+        seenSiblingIndependentContent = true;
+        continue;
+      }
+
+      let itemContent;
+      try {
+        itemContent = config.getItemContent(nonTemplateItem);
+      } catch (caughtError) {
+        throw this.#annotateContentItemError(caughtError, index);
+      }
+
+      if (!itemContent) {
+        continue;
+      }
+
+      const previousLength = content.length;
+
+      content = config.appendItemContent(content, itemContent, nonTemplateItem);
+
+      if (content.length === previousLength) {
+        continue;
+      }
+
+      if (!(nonTemplateItem instanceof Tag) || !nonTemplateItem.onlyIfSiblings) {
+        seenSiblingIndependentContent = true;
+      }
+    }
+
+    // If we've only seen sibling-dependent content (or just no content),
+    // then the content in total is blank.
+    if (!seenSiblingIndependentContent) {
+      return config.from;
+    }
+
+    return content;
+  }
+
+  #annotateContentItemError(caughtError, index) {
+    const indexPart = colors.yellow(`child #${index + 1}`);
+
+    const error =
+      new Error(
+        `Error in ${indexPart} ` +
+        `of ${inspect(this, {compact: true})}`,
+        {cause: caughtError});
+
+    if (this.#traceError && !disabledTagTracing) {
+      error[Symbol.for(`hsmusic.aggregate.alwaysTrace`)] = true;
+      error[Symbol.for(`hsmusic.aggregate.traceFrom`)] = this.#traceError;
+
+      error[Symbol.for(`hsmusic.aggregate.unhelpfulTraceLines`)] = [
+        /content-function\.js/,
+        /util\/html\.js/,
+      ];
+
+      error[Symbol.for(`hsmusic.aggregate.helpfulTraceLines`)] = [
+        /content\/dependencies\/(.*\.js:.*(?=\)))/,
+      ];
+    }
+
+    return error;
+  }
+
   static normalize(content) {
     // Normalizes contents that are valid from an `isHTML` perspective so
     // that it's always a pure, single Tag object.
@@ -1534,6 +1648,8 @@ export function resolve(tagOrTemplate, {
     return Tag.normalize(tagOrTemplate);
   } else if (normalize === 'string') {
     return Tag.normalize(tagOrTemplate).toString();
+  } else if (normalize === 'plain') {
+    return Tag.normalize(tagOrTemplate).toPlainText();
   } else if (normalize) {
     throw new TypeError(`Expected normalize to be 'tag', 'string', or null`);
   } else {