From 933af66aaaabd32acf30b7ff8236a59d29a37464 Mon Sep 17 00:00:00 2001
From: "(quasar) nebula" <qznebula@protonmail.com>
Date: Thu, 25 Jul 2024 13:19:39 -0300
Subject: data: withParsedContentEntries

---
 .../wiki-data/withParsedContentEntries.js          | 111 +++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 src/data/composite/wiki-data/withParsedContentEntries.js

(limited to 'src/data/composite/wiki-data/withParsedContentEntries.js')

diff --git a/src/data/composite/wiki-data/withParsedContentEntries.js b/src/data/composite/wiki-data/withParsedContentEntries.js
new file mode 100644
index 00000000..2a9b3f6a
--- /dev/null
+++ b/src/data/composite/wiki-data/withParsedContentEntries.js
@@ -0,0 +1,111 @@
+import {input, templateCompositeFrom} from '#composite';
+import {stitchArrays} from '#sugar';
+import {isContentString, validateInstanceOf} from '#validators';
+
+import {withPropertiesFromList} from '#composite/data';
+
+export default templateCompositeFrom({
+  annotation: `withParsedContentEntries`,
+
+  inputs: {
+    // TODO: Is there any way to validate this input based on the *other*
+    // inputs proivded, i.e. regexes? This kind of just assumes the string
+    // has already been validated according to the form the regex expects,
+    // which *is* always the case (as used), but it seems a bit awkward.
+    from: input({validate: isContentString}),
+
+    caseSensitiveRegex: input({
+      validate: validateInstanceOf(RegExp),
+    }),
+  },
+
+  outputs: [
+    '#parsedContentEntryHeadings',
+    '#parsedContentEntryBodies',
+  ],
+
+  steps: () => [
+    {
+      dependencies: [
+        input('from'),
+        input('caseSensitiveRegex'),
+      ],
+
+      compute: (continuation, {
+        [input('from')]: commentaryText,
+        [input('caseSensitiveRegex')]: caseSensitiveRegex,
+      }) => continuation({
+        ['#rawMatches']:
+          Array.from(commentaryText.matchAll(caseSensitiveRegex)),
+      }),
+    },
+
+    withPropertiesFromList({
+      list: '#rawMatches',
+      properties: input.value([
+        '0', // The entire match as a string.
+        'groups',
+        'index',
+      ]),
+    }).outputs({
+      '#rawMatches.0': '#rawMatches.text',
+      '#rawMatches.groups': '#parsedContentEntryHeadings',
+      '#rawMatches.index': '#rawMatches.startIndex',
+    }),
+
+    {
+      dependencies: [
+        '#rawMatches.text',
+        '#rawMatches.startIndex',
+      ],
+
+      compute: (continuation, {
+        ['#rawMatches.text']: text,
+        ['#rawMatches.startIndex']: startIndex,
+      }) => continuation({
+        ['#rawMatches.endIndex']:
+          stitchArrays({text, startIndex})
+            .map(({text, startIndex}) => startIndex + text.length),
+      }),
+    },
+
+    {
+      dependencies: [
+        input('from'),
+        '#rawMatches.startIndex',
+        '#rawMatches.endIndex',
+      ],
+
+      compute: (continuation, {
+        [input('from')]: commentaryText,
+        ['#rawMatches.startIndex']: startIndex,
+        ['#rawMatches.endIndex']: endIndex,
+      }) => continuation({
+        ['#parsedContentEntryBodies']:
+          stitchArrays({startIndex, endIndex})
+            .map(({endIndex}, index, stitched) =>
+              (index === stitched.length - 1
+                ? commentaryText.slice(endIndex)
+                : commentaryText.slice(
+                    endIndex,
+                    stitched[index + 1].startIndex)))
+            .map(body => body.trim()),
+      }),
+    },
+
+    {
+      dependencies: [
+        '#parsedContentEntryHeadings',
+        '#parsedContentEntryBodies',
+      ],
+
+      compute: (continuation, {
+        ['#parsedContentEntryHeadings']: parsedContentEntryHeadings,
+        ['#parsedContentEntryBodies']: parsedContentEntryBodies,
+      }) => continuation({
+        ['#parsedContentEntryHeadings']: parsedContentEntryHeadings,
+        ['#parsedContentEntryBodies']: parsedContentEntryBodies,
+      })
+    }
+  ],
+});
-- 
cgit 1.3.0-6-gf8a5