data: withParsedContentEntries

author: (quasar) nebula <qznebula@protonmail.com> 2024-07-25 13:19:39 -0300
committer: (quasar) nebula <qznebula@protonmail.com> 2025-04-13 22:54:14 -0300
commit: 933af66aaaabd32acf30b7ff8236a59d29a37464 (patch)
tree: d6a409d5208984b1f74fb876cffb2703e16d264c /src/data
parent: c8a54326365571adc5ef1816158b02eec78701cf (diff)
3 files changed, 119 insertions, 65 deletions
diff --git a/src/data/composite/wiki-data/index.js b/src/data/composite/wiki-data/index.js
index ee7411f2..d2a60935 100644
--- a/src/data/composite/wiki-data/index.js
+++ b/src/data/composite/wiki-data/index.js
@@ -17,6 +17,7 @@ export {default as withContributionListSums} from './withContributionListSums.js
 export {default as withCoverArtDate} from './withCoverArtDate.js';
 export {default as withDirectory} from './withDirectory.js';
 export {default as withParsedCommentaryEntries} from './withParsedCommentaryEntries.js';
+export {default as withParsedContentEntries} from './withParsedContentEntries.js';
 export {default as withRecontextualizedContributionList} from './withRecontextualizedContributionList.js';
 export {default as withRedatedContributionList} from './withRedatedContributionList.js';
 export {default as withResolvedAnnotatedReferenceList} from './withResolvedAnnotatedReferenceList.js';
diff --git a/src/data/composite/wiki-data/withParsedCommentaryEntries.js b/src/data/composite/wiki-data/withParsedCommentaryEntries.js
index 9bf4278c..885ea28d 100644
--- a/src/data/composite/wiki-data/withParsedCommentaryEntries.js
+++ b/src/data/composite/wiki-data/withParsedCommentaryEntries.js
@@ -11,6 +11,7 @@ import {
 } from '#composite/data';
 
 import inputSoupyFind from './inputSoupyFind.js';
+import withParsedContentEntries from './withParsedContentEntries.js';
 import withResolvedReferenceList from './withResolvedReferenceList.js';
 
 export default templateCompositeFrom({
@@ -23,72 +24,13 @@ export default templateCompositeFrom({
   outputs: ['#parsedCommentaryEntries'],
 
   steps: () => [
-    {
-      dependencies: [input('from')],
-
-      compute: (continuation, {
-        [input('from')]: commentaryText,
-      }) => continuation({
-        ['#rawMatches']:
-          Array.from(commentaryText.matchAll(commentaryRegexCaseSensitive)),
-      }),
-    },
-
-    withPropertiesFromList({
-      list: '#rawMatches',
-      properties: input.value([
-        '0', // The entire match as a string.
-        'groups',
-        'index',
-      ]),
-    }).outputs({
-      '#rawMatches.0': '#rawMatches.text',
-      '#rawMatches.groups': '#rawMatches.groups',
-      '#rawMatches.index': '#rawMatches.startIndex',
+    withParsedContentEntries({
+      from: input('from'),
+      caseSensitiveRegex: input.value(commentaryRegexCaseSensitive),
     }),
 
-    {
-      dependencies: [
-        '#rawMatches.text',
-        '#rawMatches.startIndex',
-      ],
-
-      compute: (continuation, {
-        ['#rawMatches.text']: text,
-        ['#rawMatches.startIndex']: startIndex,
-      }) => continuation({
-        ['#rawMatches.endIndex']:
-          stitchArrays({text, startIndex})
-            .map(({text, startIndex}) => startIndex + text.length),
-      }),
-    },
-
-    {
-      dependencies: [
-        input('from'),
-        '#rawMatches.startIndex',
-        '#rawMatches.endIndex',
-      ],
-
-      compute: (continuation, {
-        [input('from')]: commentaryText,
-        ['#rawMatches.startIndex']: startIndex,
-        ['#rawMatches.endIndex']: endIndex,
-      }) => continuation({
-        ['#entries.body']:
-          stitchArrays({startIndex, endIndex})
-            .map(({endIndex}, index, stitched) =>
-              (index === stitched.length - 1
-                ? commentaryText.slice(endIndex)
-                : commentaryText.slice(
-                    endIndex,
-                    stitched[index + 1].startIndex)))
-            .map(body => body.trim()),
-      }),
-    },
-
     withPropertiesFromList({
-      list: '#rawMatches.groups',
+      list: '#parsedContentEntryHeadings',
       prefix: input.value('#entries'),
       properties: input.value([
         'artistReferences',
@@ -228,7 +170,7 @@ export default templateCompositeFrom({
         '#entries.dateKind',
         '#entries.accessDate',
         '#entries.accessKind',
-        '#entries.body',
+        '#parsedContentEntryBodies',
       ],
 
       compute: (continuation, {
@@ -240,7 +182,7 @@ export default templateCompositeFrom({
         ['#entries.dateKind']: dateKind,
         ['#entries.accessDate']: accessDate,
         ['#entries.accessKind']: accessKind,
-        ['#entries.body']: body,
+        ['#parsedContentEntryBodies']: body,
       }) => continuation({
         ['#parsedCommentaryEntries']:
           stitchArrays({
diff --git a/src/data/composite/wiki-data/withParsedContentEntries.js b/src/data/composite/wiki-data/withParsedContentEntries.js
new file mode 100644
index 00000000..2a9b3f6a
--- /dev/null
+++ b/src/data/composite/wiki-data/withParsedContentEntries.js
@@ -0,0 +1,111 @@
+import {input, templateCompositeFrom} from '#composite';
+import {stitchArrays} from '#sugar';
+import {isContentString, validateInstanceOf} from '#validators';
+
+import {withPropertiesFromList} from '#composite/data';
+
+export default templateCompositeFrom({
+  annotation: `withParsedContentEntries`,
+
+  inputs: {
+    // TODO: Is there any way to validate this input based on the *other*
+    // inputs proivded, i.e. regexes? This kind of just assumes the string
+    // has already been validated according to the form the regex expects,
+    // which *is* always the case (as used), but it seems a bit awkward.
+    from: input({validate: isContentString}),
+
+    caseSensitiveRegex: input({
+      validate: validateInstanceOf(RegExp),
+    }),
+  },
+
+  outputs: [
+    '#parsedContentEntryHeadings',
+    '#parsedContentEntryBodies',
+  ],
+
+  steps: () => [
+    {
+      dependencies: [
+        input('from'),
+        input('caseSensitiveRegex'),
+      ],
+
+      compute: (continuation, {
+        [input('from')]: commentaryText,
+        [input('caseSensitiveRegex')]: caseSensitiveRegex,
+      }) => continuation({
+        ['#rawMatches']:
+          Array.from(commentaryText.matchAll(caseSensitiveRegex)),
+      }),
+    },
+
+    withPropertiesFromList({
+      list: '#rawMatches',
+      properties: input.value([
+        '0', // The entire match as a string.
+        'groups',
+        'index',
+      ]),
+    }).outputs({
+      '#rawMatches.0': '#rawMatches.text',
+      '#rawMatches.groups': '#parsedContentEntryHeadings',
+      '#rawMatches.index': '#rawMatches.startIndex',
+    }),
+
+    {
+      dependencies: [
+        '#rawMatches.text',
+        '#rawMatches.startIndex',
+      ],
+
+      compute: (continuation, {
+        ['#rawMatches.text']: text,
+        ['#rawMatches.startIndex']: startIndex,
+      }) => continuation({
+        ['#rawMatches.endIndex']:
+          stitchArrays({text, startIndex})
+            .map(({text, startIndex}) => startIndex + text.length),
+      }),
+    },
+
+    {
+      dependencies: [
+        input('from'),
+        '#rawMatches.startIndex',
+        '#rawMatches.endIndex',
+      ],
+
+      compute: (continuation, {
+        [input('from')]: commentaryText,
+        ['#rawMatches.startIndex']: startIndex,
+        ['#rawMatches.endIndex']: endIndex,
+      }) => continuation({
+        ['#parsedContentEntryBodies']:
+          stitchArrays({startIndex, endIndex})
+            .map(({endIndex}, index, stitched) =>
+              (index === stitched.length - 1
+                ? commentaryText.slice(endIndex)
+                : commentaryText.slice(
+                    endIndex,
+                    stitched[index + 1].startIndex)))
+            .map(body => body.trim()),
+      }),
+    },
+
+    {
+      dependencies: [
+        '#parsedContentEntryHeadings',
+        '#parsedContentEntryBodies',
+      ],
+
+      compute: (continuation, {
+        ['#parsedContentEntryHeadings']: parsedContentEntryHeadings,
+        ['#parsedContentEntryBodies']: parsedContentEntryBodies,
+      }) => continuation({
+        ['#parsedContentEntryHeadings']: parsedContentEntryHeadings,
+        ['#parsedContentEntryBodies']: parsedContentEntryBodies,
+      })
+    }
+  ],
+});
author	(quasar) nebula <qznebula@protonmail.com>	2024-07-25 13:19:39 -0300
committer	(quasar) nebula <qznebula@protonmail.com>	2025-04-13 22:54:14 -0300
commit	933af66aaaabd32acf30b7ff8236a59d29a37464 (patch)
tree	d6a409d5208984b1f74fb876cffb2703e16d264c /src/data
parent	c8a54326365571adc5ef1816158b02eec78701cf (diff)