diff options
author | (quasar) nebula <qznebula@protonmail.com> | 2024-07-25 13:19:39 -0300 |
---|---|---|
committer | (quasar) nebula <qznebula@protonmail.com> | 2025-04-13 22:54:14 -0300 |
commit | 933af66aaaabd32acf30b7ff8236a59d29a37464 (patch) | |
tree | d6a409d5208984b1f74fb876cffb2703e16d264c /src/data/composite/wiki-data/withParsedContentEntries.js | |
parent | c8a54326365571adc5ef1816158b02eec78701cf (diff) |
data: withParsedContentEntries
Diffstat (limited to 'src/data/composite/wiki-data/withParsedContentEntries.js')
-rw-r--r-- | src/data/composite/wiki-data/withParsedContentEntries.js | 111 |
1 files changed, 111 insertions, 0 deletions
diff --git a/src/data/composite/wiki-data/withParsedContentEntries.js b/src/data/composite/wiki-data/withParsedContentEntries.js new file mode 100644 index 00000000..2a9b3f6a --- /dev/null +++ b/src/data/composite/wiki-data/withParsedContentEntries.js @@ -0,0 +1,111 @@ +import {input, templateCompositeFrom} from '#composite'; +import {stitchArrays} from '#sugar'; +import {isContentString, validateInstanceOf} from '#validators'; + +import {withPropertiesFromList} from '#composite/data'; + +export default templateCompositeFrom({ + annotation: `withParsedContentEntries`, + + inputs: { + // TODO: Is there any way to validate this input based on the *other* + // inputs proivded, i.e. regexes? This kind of just assumes the string + // has already been validated according to the form the regex expects, + // which *is* always the case (as used), but it seems a bit awkward. + from: input({validate: isContentString}), + + caseSensitiveRegex: input({ + validate: validateInstanceOf(RegExp), + }), + }, + + outputs: [ + '#parsedContentEntryHeadings', + '#parsedContentEntryBodies', + ], + + steps: () => [ + { + dependencies: [ + input('from'), + input('caseSensitiveRegex'), + ], + + compute: (continuation, { + [input('from')]: commentaryText, + [input('caseSensitiveRegex')]: caseSensitiveRegex, + }) => continuation({ + ['#rawMatches']: + Array.from(commentaryText.matchAll(caseSensitiveRegex)), + }), + }, + + withPropertiesFromList({ + list: '#rawMatches', + properties: input.value([ + '0', // The entire match as a string. + 'groups', + 'index', + ]), + }).outputs({ + '#rawMatches.0': '#rawMatches.text', + '#rawMatches.groups': '#parsedContentEntryHeadings', + '#rawMatches.index': '#rawMatches.startIndex', + }), + + { + dependencies: [ + '#rawMatches.text', + '#rawMatches.startIndex', + ], + + compute: (continuation, { + ['#rawMatches.text']: text, + ['#rawMatches.startIndex']: startIndex, + }) => continuation({ + ['#rawMatches.endIndex']: + stitchArrays({text, startIndex}) + .map(({text, startIndex}) => startIndex + text.length), + }), + }, + + { + dependencies: [ + input('from'), + '#rawMatches.startIndex', + '#rawMatches.endIndex', + ], + + compute: (continuation, { + [input('from')]: commentaryText, + ['#rawMatches.startIndex']: startIndex, + ['#rawMatches.endIndex']: endIndex, + }) => continuation({ + ['#parsedContentEntryBodies']: + stitchArrays({startIndex, endIndex}) + .map(({endIndex}, index, stitched) => + (index === stitched.length - 1 + ? commentaryText.slice(endIndex) + : commentaryText.slice( + endIndex, + stitched[index + 1].startIndex))) + .map(body => body.trim()), + }), + }, + + { + dependencies: [ + '#parsedContentEntryHeadings', + '#parsedContentEntryBodies', + ], + + compute: (continuation, { + ['#parsedContentEntryHeadings']: parsedContentEntryHeadings, + ['#parsedContentEntryBodies']: parsedContentEntryBodies, + }) => continuation({ + ['#parsedContentEntryHeadings']: parsedContentEntryHeadings, + ['#parsedContentEntryBodies']: parsedContentEntryBodies, + }) + } + ], +}); |