diff options
author | (quasar) nebula <qznebula@protonmail.com> | 2025-05-01 14:01:01 -0300 |
---|---|---|
committer | (quasar) nebula <qznebula@protonmail.com> | 2025-05-06 12:29:05 -0300 |
commit | 57dd7dbdafba63b6edbd330b98072f09957a5492 (patch) | |
tree | 529779314f34e53ba116422fe68a1fa97c5e9cc8 /src | |
parent | 9f14f1dfc7aa6c00c0cfa07577208ad1bdcc62f7 (diff) |
data: withSourceText: parse nodes
The node-splitting behavior is itself about identical to what postprocessExternalLinks still does, so it would be nice to factor that out, but we haven't done so yet. Some degree of "parse stuff out of a comma-divided bunch of source text" is probably worth factoring out too, later.
Diffstat (limited to 'src')
-rw-r--r-- | src/data/composite/things/content/withSourceText.js | 171 |
1 files changed, 139 insertions, 32 deletions
diff --git a/src/data/composite/things/content/withSourceText.js b/src/data/composite/things/content/withSourceText.js index cfab64a8..7f03f97d 100644 --- a/src/data/composite/things/content/withSourceText.js +++ b/src/data/composite/things/content/withSourceText.js @@ -1,10 +1,70 @@ -import * as marked from 'marked'; - import {input, templateCompositeFrom} from '#composite'; -import {matchMarkdownLinks} from '#wiki-data'; +import {parseInput} from '#replacer'; import {raiseOutputWithoutDependency} from '#composite/control-flow'; +import { + withLengthOfList, + withMappedList, + withNearbyItemFromList, + withPropertyFromObject, +} from '#composite/data'; + +function* splitTextNodeAroundCommas(node) { + let textNode = { + i: node.i, + iEnd: null, + type: 'text', + data: '', + }; + + let parseFrom = 0; + for (const match of node.data.matchAll(/, */g)) { + const {index} = match, [{length}] = match; + + textNode.data += node.data.slice(parseFrom, index); + + if (textNode.data) { + textNode.iEnd = textNode.i + textNode.data.length; + yield textNode; + + textNode = { + i: node.i + index + length, + iEnd: null, + type: 'text', + data: '', + }; + } + + yield { + i: node.i + index, + iEnd: node.i + index + length, + type: 'comma-separator', + }; + + parseFrom = index + length; + } + + if (parseFrom !== node.data.length) { + textNode.data += node.data.slice(parseFrom); + textNode.iEnd = node.iEnd; + } + + if (textNode.data) { + yield textNode; + } +} + +function* splitTextNodesAroundCommas(nodes) { + for (const node of nodes) { + if (node.type === 'text' && node.data.includes(',')) { + yield* splitTextNodeAroundCommas(node); + } else { + yield node; + } + } +} + export default templateCompositeFrom({ annotation: `withSourceText`, @@ -16,60 +76,107 @@ export default templateCompositeFrom({ output: input.value({'#sourceText': null}), }), + // Get the list of notes including custom comma-separator nodes, + // and do some basic processing to make details about this list + // available later. + { dependencies: ['annotation'], compute: (continuation, { ['annotation']: annotation, }) => continuation({ - ['#matches']: - Array.from(matchMarkdownLinks(annotation, {marked})), + ['#nodes']: + Array.from( + splitTextNodesAroundCommas( + parseInput(annotation))), }), }, - raiseOutputWithoutDependency({ - dependency: '#matches', - output: input.value({'#sourceText': null}), - mode: input.value('empty'), + withLengthOfList({ + list: '#nodes', + }), + + withMappedList({ + list: '#nodes', + map: input.value(node => node.type === 'comma-separator'), + }).outputs({ + '#mappedList': '#commaSeparatorFilter', }), + // Identify the first and last nodes in the range running from + // the first external link, up til (not including) the following + // comma separator. + { - dependencies: ['#matches'], + dependencies: ['#nodes'], compute: (continuation, { - ['#matches']: matches, - }) => - continuation({ - ['#startIndex']: - matches.at(0).index, - - ['#endIndex']: - matches.at(-1).index + - matches.at(-1).length, - }), + ['#nodes']: nodes, + }) => continuation({ + ['#firstExternalLink']: + nodes.find(node => node.type === 'external-link'), + }), }, + raiseOutputWithoutDependency({ + dependency: '#firstExternalLink', + output: input.value({'#sourceText': null}), + }), + + withNearbyItemFromList({ + item: '#firstExternalLink', + list: '#nodes', + offset: input.value(+1), + + filter: '#commaSeparatorFilter', + }).outputs({ + '#nearbyItem': '#nextCommaSeparator', + }), + { - dependencies: ['annotation', '#endIndex'], + dependencies: [ + '#firstExternalLink', + '#nextCommaSeparator', + '#nodes', + ], + compute: (continuation, { - ['annotation']: annotation, - ['#endIndex']: endIndex, + ['#firstExternalLink']: firstExternalLink, + ['#nextCommaSeparator']: nextCommaSeparator, + ['#nodes']: nodes, }) => continuation({ - ['#rest']: - annotation.slice(endIndex) - .match(/^[^,]*(?=,|$)/), + ['#lastNodeInRange']: + (nextCommaSeparator + ? nodes.at(nodes.indexOf(nextCommaSeparator) - 1) + : nodes.at(-1)), }), }, + // Extract the content text covered by that range. + + withPropertyFromObject({ + object: '#firstExternalLink', + property: input.value('i'), + }), + + withPropertyFromObject({ + object: '#lastNodeInRange', + property: input.value('iEnd'), + }), + { - dependencies: ['annotation', '#startIndex', '#endIndex', '#rest'], + dependencies: [ + '#firstExternalLink.i', + '#lastNodeInRange.iEnd', + 'annotation', + ], + compute: (continuation, { + ['#firstExternalLink.i']: i, + ['#lastNodeInRange.iEnd']: iEnd, ['annotation']: annotation, - ['#startIndex']: startIndex, - ['#endIndex']: endIndex, - ['#rest']: rest, }) => continuation({ ['#sourceText']: - annotation.slice(startIndex, startIndex + endIndex) + - rest, + annotation.slice(i, iEnd), }), }, ], |