From ac5e149c7220a473068536cd15e52fa0fbfaf2fe Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 1 May 2025 14:49:30 -0300 Subject: data: ContentEntry.annotationParts Also fixes an error in splitting text around commas. --- .../things/content/withAnnotationParts.js | 221 +++++++++++++++++++++ 1 file changed, 221 insertions(+) create mode 100644 src/data/composite/things/content/withAnnotationParts.js (limited to 'src/data/composite/things/content/withAnnotationParts.js') diff --git a/src/data/composite/things/content/withAnnotationParts.js b/src/data/composite/things/content/withAnnotationParts.js new file mode 100644 index 00000000..a7b2d1f0 --- /dev/null +++ b/src/data/composite/things/content/withAnnotationParts.js @@ -0,0 +1,221 @@ +import {input, templateCompositeFrom} from '#composite'; +import {parseInput} from '#replacer'; +import {transposeArrays} from '#sugar'; +import {is} from '#validators'; + +import {raiseOutputWithoutDependency} from '#composite/control-flow'; + +import { + withFilteredList, + withMappedList, + withPropertyFromList, + withUnflattenedList, +} from '#composite/data'; + +function* splitTextNodeAroundCommas(node) { + let textNode = { + i: node.i, + iEnd: null, + type: 'text', + data: '', + }; + + let parseFrom = 0; + for (const match of node.data.matchAll(/, */g)) { + const {index} = match, [{length}] = match; + + textNode.data += node.data.slice(parseFrom, index); + + if (textNode.data) { + textNode.iEnd = textNode.i + textNode.data.length; + yield textNode; + } + + yield { + i: node.i + index, + iEnd: node.i + index + length, + type: 'comma-separator', + }; + + textNode = { + i: node.i + index + length, + iEnd: null, + type: 'text', + data: '', + }; + + parseFrom = index + length; + } + + if (parseFrom !== node.data.length) { + textNode.data += node.data.slice(parseFrom); + textNode.iEnd = node.iEnd; + } + + if (textNode.data) { + yield textNode; + } +} + +function* splitTextNodesAroundCommas(nodes) { + for (const node of nodes) { + if (node.type === 'text' && node.data.includes(',')) { + yield* splitTextNodeAroundCommas(node); + } else { + yield node; + } + } +} + +export default templateCompositeFrom({ + annotation: `withAnnotationParts`, + + inputs: { + mode: input({ + validate: is('strings', 'nodes'), + }), + }, + + outputs: ['#annotationParts'], + + steps: () => [ + raiseOutputWithoutDependency({ + dependency: 'annotation', + output: input.value({'#annotationParts': []}), + }), + + // Get the list of nodes including custom comma-separator nodes. + + { + dependencies: ['annotation'], + compute: (continuation, { + ['annotation']: annotation, + }) => continuation({ + ['#nodes']: + Array.from( + splitTextNodesAroundCommas( + parseInput(annotation))), + }), + }, + + // Join the nodes into arrays for each range between comma separators, + // excluding the comma-separator nodes themselves. + + withMappedList({ + list: '#nodes', + map: input.value(node => node.type === 'comma-separator'), + }).outputs({ + '#mappedList': '#commaSeparatorFilter', + }), + + withMappedList({ + list: '#commaSeparatorFilter', + filter: '#commaSeparatorFilter', + map: input.value((_node, index) => index), + }), + + withFilteredList({ + list: '#mappedList', + filter: '#commaSeparatorFilter', + }).outputs({ + '#filteredList': '#commaSeparatorIndices', + }), + + { + dependencies: ['#nodes', '#commaSeparatorFilter'], + + compute: (continuation, { + ['#nodes']: nodes, + ['#commaSeparatorFilter']: commaSeparatorFilter, + }) => continuation({ + ['#nodes']: + nodes.map((node, index) => + (commaSeparatorFilter[index] + ? null + : node)), + }), + }, + + { + dependencies: ['#commaSeparatorIndices'], + compute: (continuation, { + ['#commaSeparatorIndices']: commaSeparatorIndices, + }) => continuation({ + ['#unflattenIndices']: + [0, ...commaSeparatorIndices], + }), + }, + + withUnflattenedList({ + list: '#nodes', + indices: '#unflattenIndices', + }).outputs({ + '#unflattenedList': '#nodeLists', + }), + + // Raise output now, if we're looking for node lists. + + { + dependencies: ['#nodeLists', input('mode')], + compute: (continuation, { + ['#nodeLists']: nodeLists, + [input('mode')]: mode, + }) => + (mode === 'nodes' + ? continuation.raiseOutput({'#annotationParts': nodeLists}) + : continuation()), + }, + + // Get the start of the first node, and the end of the last node, + // within each list. + + { + dependencies: ['#nodeLists'], + + compute: (continuation, { + ['#nodeLists']: nodeLists, + }) => continuation({ + ['#firstNodes']: + nodeLists.map(list => list.at(0)), + + ['#lastNodes']: + nodeLists.map(list => list.at(-1)), + }), + }, + + withPropertyFromList({ + list: '#firstNodes', + property: input.value('i'), + }).outputs({ + '#firstNodes.i': '#startIndices', + }), + + withPropertyFromList({ + list: '#lastNodes', + property: input.value('iEnd'), + }).outputs({ + '#lastNodes.iEnd': '#endIndices', + }), + + // Slice the content text within the bounds of each node list. + + { + dependencies: [ + 'annotation', + '#startIndices', + '#endIndices', + ], + + compute: (continuation, { + ['annotation']: annotation, + ['#startIndices']: startIndices, + ['#endIndices']: endIndices, + }) => continuation({ + ['#annotationParts']: + transposeArrays([startIndices, endIndices]) + .map(([start, end]) => + annotation.slice(start, end)), + }), + }, + ], +}); -- cgit 1.3.0-6-gf8a5