diff options
author | (quasar) nebula <qznebula@protonmail.com> | 2025-05-01 14:49:30 -0300 |
---|---|---|
committer | (quasar) nebula <qznebula@protonmail.com> | 2025-05-06 12:29:05 -0300 |
commit | ac5e149c7220a473068536cd15e52fa0fbfaf2fe (patch) | |
tree | 77c08249455b5a69c02526e118e97aeb1321b01e | |
parent | 57dd7dbdafba63b6edbd330b98072f09957a5492 (diff) |
data: ContentEntry.annotationParts
Also fixes an error in splitting text around commas.
-rw-r--r-- | src/data/composite/things/content/index.js | 1 | ||||
-rw-r--r-- | src/data/composite/things/content/withAnnotationParts.js | 221 | ||||
-rw-r--r-- | src/data/composite/things/content/withSourceText.js | 166 | ||||
-rw-r--r-- | src/data/things/content.js | 11 |
4 files changed, 250 insertions, 149 deletions
diff --git a/src/data/composite/things/content/index.js b/src/data/composite/things/content/index.js index 1ee188c5..71133ce0 100644 --- a/src/data/composite/things/content/index.js +++ b/src/data/composite/things/content/index.js @@ -1,2 +1,3 @@ +export {default as withAnnotationParts} from './withAnnotationParts.js'; export {default as withSourceText} from './withSourceText.js'; export {default as withWebArchiveDate} from './withWebArchiveDate.js'; diff --git a/src/data/composite/things/content/withAnnotationParts.js b/src/data/composite/things/content/withAnnotationParts.js new file mode 100644 index 00000000..a7b2d1f0 --- /dev/null +++ b/src/data/composite/things/content/withAnnotationParts.js @@ -0,0 +1,221 @@ +import {input, templateCompositeFrom} from '#composite'; +import {parseInput} from '#replacer'; +import {transposeArrays} from '#sugar'; +import {is} from '#validators'; + +import {raiseOutputWithoutDependency} from '#composite/control-flow'; + +import { + withFilteredList, + withMappedList, + withPropertyFromList, + withUnflattenedList, +} from '#composite/data'; + +function* splitTextNodeAroundCommas(node) { + let textNode = { + i: node.i, + iEnd: null, + type: 'text', + data: '', + }; + + let parseFrom = 0; + for (const match of node.data.matchAll(/, */g)) { + const {index} = match, [{length}] = match; + + textNode.data += node.data.slice(parseFrom, index); + + if (textNode.data) { + textNode.iEnd = textNode.i + textNode.data.length; + yield textNode; + } + + yield { + i: node.i + index, + iEnd: node.i + index + length, + type: 'comma-separator', + }; + + textNode = { + i: node.i + index + length, + iEnd: null, + type: 'text', + data: '', + }; + + parseFrom = index + length; + } + + if (parseFrom !== node.data.length) { + textNode.data += node.data.slice(parseFrom); + textNode.iEnd = node.iEnd; + } + + if (textNode.data) { + yield textNode; + } +} + +function* splitTextNodesAroundCommas(nodes) { + for (const node of nodes) { + if (node.type === 'text' && node.data.includes(',')) { + yield* splitTextNodeAroundCommas(node); + } else { + yield node; + } + } +} + +export default templateCompositeFrom({ + annotation: `withAnnotationParts`, + + inputs: { + mode: input({ + validate: is('strings', 'nodes'), + }), + }, + + outputs: ['#annotationParts'], + + steps: () => [ + raiseOutputWithoutDependency({ + dependency: 'annotation', + output: input.value({'#annotationParts': []}), + }), + + // Get the list of nodes including custom comma-separator nodes. + + { + dependencies: ['annotation'], + compute: (continuation, { + ['annotation']: annotation, + }) => continuation({ + ['#nodes']: + Array.from( + splitTextNodesAroundCommas( + parseInput(annotation))), + }), + }, + + // Join the nodes into arrays for each range between comma separators, + // excluding the comma-separator nodes themselves. + + withMappedList({ + list: '#nodes', + map: input.value(node => node.type === 'comma-separator'), + }).outputs({ + '#mappedList': '#commaSeparatorFilter', + }), + + withMappedList({ + list: '#commaSeparatorFilter', + filter: '#commaSeparatorFilter', + map: input.value((_node, index) => index), + }), + + withFilteredList({ + list: '#mappedList', + filter: '#commaSeparatorFilter', + }).outputs({ + '#filteredList': '#commaSeparatorIndices', + }), + + { + dependencies: ['#nodes', '#commaSeparatorFilter'], + + compute: (continuation, { + ['#nodes']: nodes, + ['#commaSeparatorFilter']: commaSeparatorFilter, + }) => continuation({ + ['#nodes']: + nodes.map((node, index) => + (commaSeparatorFilter[index] + ? null + : node)), + }), + }, + + { + dependencies: ['#commaSeparatorIndices'], + compute: (continuation, { + ['#commaSeparatorIndices']: commaSeparatorIndices, + }) => continuation({ + ['#unflattenIndices']: + [0, ...commaSeparatorIndices], + }), + }, + + withUnflattenedList({ + list: '#nodes', + indices: '#unflattenIndices', + }).outputs({ + '#unflattenedList': '#nodeLists', + }), + + // Raise output now, if we're looking for node lists. + + { + dependencies: ['#nodeLists', input('mode')], + compute: (continuation, { + ['#nodeLists']: nodeLists, + [input('mode')]: mode, + }) => + (mode === 'nodes' + ? continuation.raiseOutput({'#annotationParts': nodeLists}) + : continuation()), + }, + + // Get the start of the first node, and the end of the last node, + // within each list. + + { + dependencies: ['#nodeLists'], + + compute: (continuation, { + ['#nodeLists']: nodeLists, + }) => continuation({ + ['#firstNodes']: + nodeLists.map(list => list.at(0)), + + ['#lastNodes']: + nodeLists.map(list => list.at(-1)), + }), + }, + + withPropertyFromList({ + list: '#firstNodes', + property: input.value('i'), + }).outputs({ + '#firstNodes.i': '#startIndices', + }), + + withPropertyFromList({ + list: '#lastNodes', + property: input.value('iEnd'), + }).outputs({ + '#lastNodes.iEnd': '#endIndices', + }), + + // Slice the content text within the bounds of each node list. + + { + dependencies: [ + 'annotation', + '#startIndices', + '#endIndices', + ], + + compute: (continuation, { + ['annotation']: annotation, + ['#startIndices']: startIndices, + ['#endIndices']: endIndices, + }) => continuation({ + ['#annotationParts']: + transposeArrays([startIndices, endIndices]) + .map(([start, end]) => + annotation.slice(start, end)), + }), + }, + ], +}); diff --git a/src/data/composite/things/content/withSourceText.js b/src/data/composite/things/content/withSourceText.js index 7f03f97d..487b3763 100644 --- a/src/data/composite/things/content/withSourceText.js +++ b/src/data/composite/things/content/withSourceText.js @@ -3,67 +3,7 @@ import {parseInput} from '#replacer'; import {raiseOutputWithoutDependency} from '#composite/control-flow'; -import { - withLengthOfList, - withMappedList, - withNearbyItemFromList, - withPropertyFromObject, -} from '#composite/data'; - -function* splitTextNodeAroundCommas(node) { - let textNode = { - i: node.i, - iEnd: null, - type: 'text', - data: '', - }; - - let parseFrom = 0; - for (const match of node.data.matchAll(/, */g)) { - const {index} = match, [{length}] = match; - - textNode.data += node.data.slice(parseFrom, index); - - if (textNode.data) { - textNode.iEnd = textNode.i + textNode.data.length; - yield textNode; - - textNode = { - i: node.i + index + length, - iEnd: null, - type: 'text', - data: '', - }; - } - - yield { - i: node.i + index, - iEnd: node.i + index + length, - type: 'comma-separator', - }; - - parseFrom = index + length; - } - - if (parseFrom !== node.data.length) { - textNode.data += node.data.slice(parseFrom); - textNode.iEnd = node.iEnd; - } - - if (textNode.data) { - yield textNode; - } -} - -function* splitTextNodesAroundCommas(nodes) { - for (const node of nodes) { - if (node.type === 'text' && node.data.includes(',')) { - yield* splitTextNodeAroundCommas(node); - } else { - yield node; - } - } -} +import withAnnotationParts from './withAnnotationParts.js'; export default templateCompositeFrom({ annotation: `withSourceText`, @@ -71,112 +11,42 @@ export default templateCompositeFrom({ outputs: ['#sourceText'], steps: () => [ - raiseOutputWithoutDependency({ - dependency: 'annotation', - output: input.value({'#sourceText': null}), + withAnnotationParts({ + mode: input.value('nodes'), }), - // Get the list of notes including custom comma-separator nodes, - // and do some basic processing to make details about this list - // available later. - - { - dependencies: ['annotation'], - compute: (continuation, { - ['annotation']: annotation, - }) => continuation({ - ['#nodes']: - Array.from( - splitTextNodesAroundCommas( - parseInput(annotation))), - }), - }, - - withLengthOfList({ - list: '#nodes', - }), - - withMappedList({ - list: '#nodes', - map: input.value(node => node.type === 'comma-separator'), - }).outputs({ - '#mappedList': '#commaSeparatorFilter', - }), - - // Identify the first and last nodes in the range running from - // the first external link, up til (not including) the following - // comma separator. - - { - dependencies: ['#nodes'], - compute: (continuation, { - ['#nodes']: nodes, - }) => continuation({ - ['#firstExternalLink']: - nodes.find(node => node.type === 'external-link'), - }), - }, - raiseOutputWithoutDependency({ - dependency: '#firstExternalLink', + dependency: '#annotationParts', output: input.value({'#sourceText': null}), }), - withNearbyItemFromList({ - item: '#firstExternalLink', - list: '#nodes', - offset: input.value(+1), - - filter: '#commaSeparatorFilter', - }).outputs({ - '#nearbyItem': '#nextCommaSeparator', - }), - { - dependencies: [ - '#firstExternalLink', - '#nextCommaSeparator', - '#nodes', - ], - + dependencies: ['#annotationParts'], compute: (continuation, { - ['#firstExternalLink']: firstExternalLink, - ['#nextCommaSeparator']: nextCommaSeparator, - ['#nodes']: nodes, + ['#annotationParts']: annotationParts, }) => continuation({ - ['#lastNodeInRange']: - (nextCommaSeparator - ? nodes.at(nodes.indexOf(nextCommaSeparator) - 1) - : nodes.at(-1)), + ['#firstPartWithExternalLink']: + annotationParts + .find(nodes => nodes + .some(node => node.type === 'external-link')), }), }, - // Extract the content text covered by that range. - - withPropertyFromObject({ - object: '#firstExternalLink', - property: input.value('i'), - }), - - withPropertyFromObject({ - object: '#lastNodeInRange', - property: input.value('iEnd'), + raiseOutputWithoutDependency({ + dependency: '#firstPartWithExternalLink', + output: input.value({'#sourceText': null}), }), { - dependencies: [ - '#firstExternalLink.i', - '#lastNodeInRange.iEnd', - 'annotation', - ], - + dependencies: ['annotation', '#firstPartWithExternalLink'], compute: (continuation, { - ['#firstExternalLink.i']: i, - ['#lastNodeInRange.iEnd']: iEnd, ['annotation']: annotation, + ['#firstPartWithExternalLink']: nodes, }) => continuation({ ['#sourceText']: - annotation.slice(i, iEnd), + annotation.slice( + nodes.at(0).i, + nodes.at(-1).iEnd), }), }, ], diff --git a/src/data/things/content.js b/src/data/things/content.js index d68fd5be..660d7020 100644 --- a/src/data/things/content.js +++ b/src/data/things/content.js @@ -15,7 +15,8 @@ import { withResultOfAvailabilityCheck, } from '#composite/control-flow'; -import {withWebArchiveDate, withSourceText} from '#composite/things/content'; +import {withAnnotationParts, withSourceText, withWebArchiveDate} + from '#composite/things/content'; export class ContentEntry extends Thing { static [Thing.getPropertyDescriptors] = ({Artist}) => ({ @@ -100,6 +101,14 @@ export class ContentEntry extends Thing { // Expose only + annotationParts: [ + withAnnotationParts({ + mode: input.value('strings'), + }), + + exposeDependency({dependency: '#annotationParts'}), + ], + sourceText: [ withSourceText(), exposeDependency({dependency: '#sourceText'}), |