From 933af66aaaabd32acf30b7ff8236a59d29a37464 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 25 Jul 2024 13:19:39 -0300 Subject: data: withParsedContentEntries --- .../wiki-data/withParsedContentEntries.js | 111 +++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 src/data/composite/wiki-data/withParsedContentEntries.js (limited to 'src/data/composite/wiki-data/withParsedContentEntries.js') diff --git a/src/data/composite/wiki-data/withParsedContentEntries.js b/src/data/composite/wiki-data/withParsedContentEntries.js new file mode 100644 index 00000000..2a9b3f6a --- /dev/null +++ b/src/data/composite/wiki-data/withParsedContentEntries.js @@ -0,0 +1,111 @@ +import {input, templateCompositeFrom} from '#composite'; +import {stitchArrays} from '#sugar'; +import {isContentString, validateInstanceOf} from '#validators'; + +import {withPropertiesFromList} from '#composite/data'; + +export default templateCompositeFrom({ + annotation: `withParsedContentEntries`, + + inputs: { + // TODO: Is there any way to validate this input based on the *other* + // inputs proivded, i.e. regexes? This kind of just assumes the string + // has already been validated according to the form the regex expects, + // which *is* always the case (as used), but it seems a bit awkward. + from: input({validate: isContentString}), + + caseSensitiveRegex: input({ + validate: validateInstanceOf(RegExp), + }), + }, + + outputs: [ + '#parsedContentEntryHeadings', + '#parsedContentEntryBodies', + ], + + steps: () => [ + { + dependencies: [ + input('from'), + input('caseSensitiveRegex'), + ], + + compute: (continuation, { + [input('from')]: commentaryText, + [input('caseSensitiveRegex')]: caseSensitiveRegex, + }) => continuation({ + ['#rawMatches']: + Array.from(commentaryText.matchAll(caseSensitiveRegex)), + }), + }, + + withPropertiesFromList({ + list: '#rawMatches', + properties: input.value([ + '0', // The entire match as a string. + 'groups', + 'index', + ]), + }).outputs({ + '#rawMatches.0': '#rawMatches.text', + '#rawMatches.groups': '#parsedContentEntryHeadings', + '#rawMatches.index': '#rawMatches.startIndex', + }), + + { + dependencies: [ + '#rawMatches.text', + '#rawMatches.startIndex', + ], + + compute: (continuation, { + ['#rawMatches.text']: text, + ['#rawMatches.startIndex']: startIndex, + }) => continuation({ + ['#rawMatches.endIndex']: + stitchArrays({text, startIndex}) + .map(({text, startIndex}) => startIndex + text.length), + }), + }, + + { + dependencies: [ + input('from'), + '#rawMatches.startIndex', + '#rawMatches.endIndex', + ], + + compute: (continuation, { + [input('from')]: commentaryText, + ['#rawMatches.startIndex']: startIndex, + ['#rawMatches.endIndex']: endIndex, + }) => continuation({ + ['#parsedContentEntryBodies']: + stitchArrays({startIndex, endIndex}) + .map(({endIndex}, index, stitched) => + (index === stitched.length - 1 + ? commentaryText.slice(endIndex) + : commentaryText.slice( + endIndex, + stitched[index + 1].startIndex))) + .map(body => body.trim()), + }), + }, + + { + dependencies: [ + '#parsedContentEntryHeadings', + '#parsedContentEntryBodies', + ], + + compute: (continuation, { + ['#parsedContentEntryHeadings']: parsedContentEntryHeadings, + ['#parsedContentEntryBodies']: parsedContentEntryBodies, + }) => continuation({ + ['#parsedContentEntryHeadings']: parsedContentEntryHeadings, + ['#parsedContentEntryBodies']: parsedContentEntryBodies, + }) + } + ], +}); -- cgit 1.3.0-6-gf8a5