diff options
| author | (quasar) nebula <qznebula@protonmail.com> | 2025-10-21 17:48:08 -0300 | 
|---|---|---|
| committer | (quasar) nebula <qznebula@protonmail.com> | 2025-10-21 18:31:33 -0300 | 
| commit | e347d0b449028739b5ef88e0bd183f0a747f0e3a (patch) | |
| tree | e85dc950d1e53ae9d9b0370884ab84c7075a1ce2 /src/html.js | |
| parent | db1f23ae99a4ae45a95d5da40e44158c5b74d4dd (diff) | |
html: resolve(..., {normalize: 'plain'})
Diffstat (limited to 'src/html.js')
| -rw-r--r-- | src/html.js | 334 | 
1 files changed, 225 insertions, 109 deletions
| diff --git a/src/html.js b/src/html.js index 0a868ebd..444edd6a 100644 --- a/src/html.js +++ b/src/html.js @@ -2,6 +2,8 @@ import {inspect} from 'node:util'; +import striptags from 'striptags'; + import {withAggregate} from '#aggregate'; import {colors} from '#cli'; import {empty, typeAppearance, unique} from '#sugar'; @@ -39,6 +41,40 @@ export const selfClosingTags = [ 'wbr', ]; +// Every element under: +// https://html.spec.whatwg.org/multipage/text-level-semantics.html +export const textLevelSemanticTags = [ + 'a', + 'abbr', + 'b', + 'bdi', + 'bdo', + 'br', + 'cite', + 'code', + 'data', + 'dfn', + 'em', + 'i', + 'kbd', + 'mark', + 'q', + 'rp', + 'rt', + 'ruby', + 's', + 'samp', + 'small', + 'span', + 'strong', + 'sub', + 'sup', + 'time', + 'u', + 'var', + 'wbr', +]; + // Not so comprehensive!! export const attributeSpec = { 'class': { @@ -469,6 +505,7 @@ export class Tag { this.#content = contentArray; this.#content.toString = () => this.#stringifyContent(); + this.#content.toPlainText = () => this.#plainifyContent(); } get content() { @@ -677,6 +714,10 @@ export class Tag { : '\n')); } + toPlainText() { + return this.content.toPlainText(); + } + #getContentJoiner() { if (this.joinChildren === undefined) { return '\n'; @@ -696,11 +737,8 @@ export class Tag { const joiner = this.#getContentJoiner(); - let content = ''; let blockwrapClosers = ''; - let seenSiblingIndependentContent = false; - const chunkwrapSplitter = (this.chunkwrap ? this.#getAttributeRaw('split') @@ -711,110 +749,64 @@ export class Tag { ? false : null); - let contentItems; - - determineContentItems: { - if (this.chunkwrap) { - contentItems = smush(this).content; - break determineContentItems; - } - - contentItems = this.content; - } - - for (const [index, item] of contentItems.entries()) { - const nonTemplateItem = - Template.resolve(item); - - if (nonTemplateItem instanceof Tag && nonTemplateItem.imaginarySibling) { - seenSiblingIndependentContent = true; - continue; - } + const contentItems = + (this.chunkwrap + ? smush(this).content + : this.content); + + let content = this.#renderContentItems({ + from: '', + items: contentItems, + + getItemContent: item => item.toString(), + + appendItemContent(content, itemContent, item) { + const chunkwrapChunks = + (typeof item === 'string' && chunkwrapSplitter + ? Array.from(getChunkwrapChunks(itemContent, chunkwrapSplitter)) + : null); + + const itemIncludesChunkwrapSplit = + (chunkwrapChunks + ? chunkwrapChunks.length > 1 + : null); + + if (content) { + if (itemIncludesChunkwrapSplit && !seenChunkwrapSplitter) { + // The first time we see a chunkwrap splitter, backtrack and wrap + // the content *so far* in a chunk. This will be treated just like + // any other open chunkwrap, and closed after the first chunk of + // this item! (That means the existing content is part of the same + // chunk as the first chunk included in this content, which makes + // sense, because that first chink is really just more text that + // precedes the first split.) + content = `<span class="chunkwrap">` + content; + } - let itemContent; - try { - itemContent = nonTemplateItem.toString(); - } catch (caughtError) { - const indexPart = colors.yellow(`child #${index + 1}`); - - const error = - new Error( - `Error in ${indexPart} ` + - `of ${inspect(this, {compact: true})}`, - {cause: caughtError}); - - if (this.#traceError && !disabledTagTracing) { - error[Symbol.for(`hsmusic.aggregate.alwaysTrace`)] = true; - error[Symbol.for(`hsmusic.aggregate.traceFrom`)] = this.#traceError; - - error[Symbol.for(`hsmusic.aggregate.unhelpfulTraceLines`)] = [ - /content-function\.js/, - /util\/html\.js/, - ]; - - error[Symbol.for(`hsmusic.aggregate.helpfulTraceLines`)] = [ - /content\/dependencies\/(.*\.js:.*(?=\)))/, - ]; + content += joiner; + } else if (itemIncludesChunkwrapSplit) { + // We've encountered a chunkwrap split before any other content. + // This means there's no content to wrap, no existing chunkwrap + // to close, and no reason to add a joiner, but we *do* need to + // enter a chunkwrap wrapper *now*, so the first chunk of this + // item will be properly wrapped. + content = `<span class="chunkwrap">`; } - throw error; - } - - if (!itemContent) { - continue; - } - - if (!(nonTemplateItem instanceof Tag) || !nonTemplateItem.onlyIfSiblings) { - seenSiblingIndependentContent = true; - } - - const chunkwrapChunks = - (typeof nonTemplateItem === 'string' && chunkwrapSplitter - ? Array.from(getChunkwrapChunks(itemContent, chunkwrapSplitter)) - : null); - - const itemIncludesChunkwrapSplit = - (chunkwrapChunks - ? chunkwrapChunks.length > 1 - : null); - - if (content) { - if (itemIncludesChunkwrapSplit && !seenChunkwrapSplitter) { - // The first time we see a chunkwrap splitter, backtrack and wrap - // the content *so far* in a chunk. This will be treated just like - // any other open chunkwrap, and closed after the first chunk of - // this item! (That means the existing content is part of the same - // chunk as the first chunk included in this content, which makes - // sense, because that first chink is really just more text that - // precedes the first split.) - content = `<span class="chunkwrap">` + content; + if (itemIncludesChunkwrapSplit) { + seenChunkwrapSplitter = true; } - content += joiner; - } else if (itemIncludesChunkwrapSplit) { - // We've encountered a chunkwrap split before any other content. - // This means there's no content to wrap, no existing chunkwrap - // to close, and no reason to add a joiner, but we *do* need to - // enter a chunkwrap wrapper *now*, so the first chunk of this - // item will be properly wrapped. - content = `<span class="chunkwrap">`; - } - - if (itemIncludesChunkwrapSplit) { - seenChunkwrapSplitter = true; - } - - // Blockwraps only apply if they actually contain some content whose - // words should be kept together, so it's okay to put them beneath the - // itemContent check. They also never apply at the very start of content, - // because at that point there aren't any preceding words from which the - // blockwrap would differentiate its content. - if (nonTemplateItem instanceof Tag && nonTemplateItem.blockwrap && content) { - content += `<span class="blockwrap">`; - blockwrapClosers += `</span>`; - } + // Blockwraps only apply if they actually contain some content whose + // words should be kept together, so it's okay to put them beneath the + // itemContent check. They also never apply at the very start of content, + // because at that point there aren't any preceding words from which the + // blockwrap would differentiate its content. + if (item instanceof Tag && item.blockwrap && content) { + content += `<span class="blockwrap">`; + blockwrapClosers += `</span>`; + } - appendItemContent: { if (itemIncludesChunkwrapSplit) { for (const [index, {chunk, following}] of chunkwrapChunks.entries()) { if (index === 0) { @@ -848,17 +840,15 @@ export class Tag { } } - break appendItemContent; + return content; } - content += itemContent; - } - } + return content += itemContent; + }, + }); - // If we've only seen sibling-dependent content (or just no content), - // then the content in total is blank. - if (!seenSiblingIndependentContent) { - return ''; + if (!content.length) { + return content; } if (chunkwrapSplitter) { @@ -878,6 +868,130 @@ export class Tag { return content; } + #plainifyContent() { + // Doesn't play too nice with transformContent, because that function, + // working with the Marked library to process markdown, returns a mix of + // raw HTML strings and actual tags - this function only makes nice line + // breaks out of actual tags. + + if (this.selfClosing) { + return ''; + } + + let joiner = this.#getContentJoiner(); + + if (joiner instanceof Tag && joiner.tagName === 'br') { + joiner = '\n'; + } + + if (joiner === '\n') { + joiner = ' '; + } + + let content = this.#renderContentItems({ + from: '', + items: this.content, + + getItemContent: item => + (item instanceof Tag + ? item.toPlainText() + : item.toString()), + + appendItemContent(content, itemContent, item) { + if (joiner === ' ') { + if (item instanceof Tag && !textLevelSemanticTags.includes(item.tagName)) { + content += '\n\n'; + } else if (!content.endsWith(' ')) { + content += ' '; + } + } else { + content += joiner; + } + + return content += itemContent; + }, + }); + + content = + striptags(content) + .replaceAll(''', `'`) + .replaceAll('"', `"`); + + return content; + } + + #renderContentItems(config) { + let content = structuredClone(config.from); + + let seenSiblingIndependentContent = false; + + for (const [index, item] of config.items.entries()) { + const nonTemplateItem = Template.resolve(item); + + if (nonTemplateItem instanceof Tag && nonTemplateItem.imaginarySibling) { + seenSiblingIndependentContent = true; + continue; + } + + let itemContent; + try { + itemContent = config.getItemContent(nonTemplateItem); + } catch (caughtError) { + throw this.#annotateContentItemError(caughtError, index); + } + + if (!itemContent) { + continue; + } + + const previousLength = content.length; + + content = config.appendItemContent(content, itemContent, nonTemplateItem); + + if (content.length === previousLength) { + continue; + } + + if (!(nonTemplateItem instanceof Tag) || !nonTemplateItem.onlyIfSiblings) { + seenSiblingIndependentContent = true; + } + } + + // If we've only seen sibling-dependent content (or just no content), + // then the content in total is blank. + if (!seenSiblingIndependentContent) { + return config.from; + } + + return content; + } + + #annotateContentItemError(caughtError, index) { + const indexPart = colors.yellow(`child #${index + 1}`); + + const error = + new Error( + `Error in ${indexPart} ` + + `of ${inspect(this, {compact: true})}`, + {cause: caughtError}); + + if (this.#traceError && !disabledTagTracing) { + error[Symbol.for(`hsmusic.aggregate.alwaysTrace`)] = true; + error[Symbol.for(`hsmusic.aggregate.traceFrom`)] = this.#traceError; + + error[Symbol.for(`hsmusic.aggregate.unhelpfulTraceLines`)] = [ + /content-function\.js/, + /util\/html\.js/, + ]; + + error[Symbol.for(`hsmusic.aggregate.helpfulTraceLines`)] = [ + /content\/dependencies\/(.*\.js:.*(?=\)))/, + ]; + } + + return error; + } + static normalize(content) { // Normalizes contents that are valid from an `isHTML` perspective so // that it's always a pure, single Tag object. @@ -1534,6 +1648,8 @@ export function resolve(tagOrTemplate, { return Tag.normalize(tagOrTemplate); } else if (normalize === 'string') { return Tag.normalize(tagOrTemplate).toString(); + } else if (normalize === 'plain') { + return Tag.normalize(tagOrTemplate).toPlainText(); } else if (normalize) { throw new TypeError(`Expected normalize to be 'tag', 'string', or null`); } else { | 
