data: ContentEntry.annotationParts

Also fixes an error in splitting text around commas.
author: (quasar) nebula <qznebula@protonmail.com> 2025-05-01 14:49:30 -0300
committer: (quasar) nebula <qznebula@protonmail.com> 2025-05-06 12:29:05 -0300
commit: ac5e149c7220a473068536cd15e52fa0fbfaf2fe (patch)
tree: 77c08249455b5a69c02526e118e97aeb1321b01e
parent: 57dd7dbdafba63b6edbd330b98072f09957a5492 (diff)
4 files changed, 250 insertions, 149 deletions
diff --git a/src/data/composite/things/content/index.js b/src/data/composite/things/content/index.js
index 1ee188c5..71133ce0 100644
--- a/src/data/composite/things/content/index.js
+++ b/src/data/composite/things/content/index.js
@@ -1,2 +1,3 @@
+export {default as withAnnotationParts} from './withAnnotationParts.js';
 export {default as withSourceText} from './withSourceText.js';
 export {default as withWebArchiveDate} from './withWebArchiveDate.js';
diff --git a/src/data/composite/things/content/withAnnotationParts.js b/src/data/composite/things/content/withAnnotationParts.js
new file mode 100644
index 00000000..a7b2d1f0
--- /dev/null
+++ b/src/data/composite/things/content/withAnnotationParts.js
@@ -0,0 +1,221 @@
+import {input, templateCompositeFrom} from '#composite';
+import {parseInput} from '#replacer';
+import {transposeArrays} from '#sugar';
+import {is} from '#validators';
+
+import {raiseOutputWithoutDependency} from '#composite/control-flow';
+
+import {
+  withFilteredList,
+  withMappedList,
+  withPropertyFromList,
+  withUnflattenedList,
+} from '#composite/data';
+
+function* splitTextNodeAroundCommas(node) {
+  let textNode = {
+    i: node.i,
+    iEnd: null,
+    type: 'text',
+    data: '',
+  };
+
+  let parseFrom = 0;
+  for (const match of node.data.matchAll(/, */g)) {
+    const {index} = match, [{length}] = match;
+
+    textNode.data += node.data.slice(parseFrom, index);
+
+    if (textNode.data) {
+      textNode.iEnd = textNode.i + textNode.data.length;
+      yield textNode;
+    }
+
+    yield {
+      i: node.i + index,
+      iEnd: node.i + index + length,
+      type: 'comma-separator',
+    };
+
+    textNode = {
+      i: node.i + index + length,
+      iEnd: null,
+      type: 'text',
+      data: '',
+    };
+
+    parseFrom = index + length;
+  }
+
+  if (parseFrom !== node.data.length) {
+    textNode.data += node.data.slice(parseFrom);
+    textNode.iEnd = node.iEnd;
+  }
+
+  if (textNode.data) {
+    yield textNode;
+  }
+}
+
+function* splitTextNodesAroundCommas(nodes) {
+  for (const node of nodes) {
+    if (node.type === 'text' && node.data.includes(',')) {
+      yield* splitTextNodeAroundCommas(node);
+    } else {
+      yield node;
+    }
+  }
+}
+
+export default templateCompositeFrom({
+  annotation: `withAnnotationParts`,
+
+  inputs: {
+    mode: input({
+      validate: is('strings', 'nodes'),
+    }),
+  },
+
+  outputs: ['#annotationParts'],
+
+  steps: () => [
+    raiseOutputWithoutDependency({
+      dependency: 'annotation',
+      output: input.value({'#annotationParts': []}),
+    }),
+
+    // Get the list of nodes including custom comma-separator nodes.
+
+    {
+      dependencies: ['annotation'],
+      compute: (continuation, {
+        ['annotation']: annotation,
+      }) => continuation({
+        ['#nodes']:
+          Array.from(
+            splitTextNodesAroundCommas(
+              parseInput(annotation))),
+      }),
+    },
+
+    // Join the nodes into arrays for each range between comma separators,
+    // excluding the comma-separator nodes themselves.
+
+    withMappedList({
+      list: '#nodes',
+      map: input.value(node => node.type === 'comma-separator'),
+    }).outputs({
+      '#mappedList': '#commaSeparatorFilter',
+    }),
+
+    withMappedList({
+      list: '#commaSeparatorFilter',
+      filter: '#commaSeparatorFilter',
+      map: input.value((_node, index) => index),
+    }),
+
+    withFilteredList({
+      list: '#mappedList',
+      filter: '#commaSeparatorFilter',
+    }).outputs({
+      '#filteredList': '#commaSeparatorIndices',
+    }),
+
+    {
+      dependencies: ['#nodes', '#commaSeparatorFilter'],
+
+      compute: (continuation, {
+        ['#nodes']: nodes,
+        ['#commaSeparatorFilter']: commaSeparatorFilter,
+      }) => continuation({
+        ['#nodes']:
+          nodes.map((node, index) =>
+            (commaSeparatorFilter[index]
+              ? null
+              : node)),
+      }),
+    },
+
+    {
+      dependencies: ['#commaSeparatorIndices'],
+      compute: (continuation, {
+        ['#commaSeparatorIndices']: commaSeparatorIndices,
+      }) => continuation({
+        ['#unflattenIndices']:
+          [0, ...commaSeparatorIndices],
+      }),
+    },
+
+    withUnflattenedList({
+      list: '#nodes',
+      indices: '#unflattenIndices',
+    }).outputs({
+      '#unflattenedList': '#nodeLists',
+    }),
+
+    // Raise output now, if we're looking for node lists.
+
+    {
+      dependencies: ['#nodeLists', input('mode')],
+      compute: (continuation, {
+        ['#nodeLists']: nodeLists,
+        [input('mode')]: mode,
+      }) =>
+        (mode === 'nodes'
+          ? continuation.raiseOutput({'#annotationParts': nodeLists})
+          : continuation()),
+    },
+
+    // Get the start of the first node, and the end of the last node,
+    // within each list.
+
+    {
+      dependencies: ['#nodeLists'],
+
+      compute: (continuation, {
+        ['#nodeLists']: nodeLists,
+      }) => continuation({
+        ['#firstNodes']:
+          nodeLists.map(list => list.at(0)),
+
+        ['#lastNodes']:
+          nodeLists.map(list => list.at(-1)),
+      }),
+    },
+
+    withPropertyFromList({
+      list: '#firstNodes',
+      property: input.value('i'),
+    }).outputs({
+      '#firstNodes.i': '#startIndices',
+    }),
+
+    withPropertyFromList({
+      list: '#lastNodes',
+      property: input.value('iEnd'),
+    }).outputs({
+      '#lastNodes.iEnd': '#endIndices',
+    }),
+
+    // Slice the content text within the bounds of each node list.
+
+    {
+      dependencies: [
+        'annotation',
+        '#startIndices',
+        '#endIndices',
+      ],
+
+      compute: (continuation, {
+        ['annotation']: annotation,
+        ['#startIndices']: startIndices,
+        ['#endIndices']: endIndices,
+      }) => continuation({
+        ['#annotationParts']:
+          transposeArrays([startIndices, endIndices])
+            .map(([start, end]) =>
+              annotation.slice(start, end)),
+      }),
+    },
+  ],
+});
diff --git a/src/data/composite/things/content/withSourceText.js b/src/data/composite/things/content/withSourceText.js
index 7f03f97d..487b3763 100644
--- a/src/data/composite/things/content/withSourceText.js
+++ b/src/data/composite/things/content/withSourceText.js
@@ -3,67 +3,7 @@ import {parseInput} from '#replacer';
 
 import {raiseOutputWithoutDependency} from '#composite/control-flow';
 
-import {
-  withLengthOfList,
-  withMappedList,
-  withNearbyItemFromList,
-  withPropertyFromObject,
-} from '#composite/data';
-
-function* splitTextNodeAroundCommas(node) {
-  let textNode = {
-    i: node.i,
-    iEnd: null,
-    type: 'text',
-    data: '',
-  };
-
-  let parseFrom = 0;
-  for (const match of node.data.matchAll(/, */g)) {
-    const {index} = match, [{length}] = match;
-
-    textNode.data += node.data.slice(parseFrom, index);
-
-    if (textNode.data) {
-      textNode.iEnd = textNode.i + textNode.data.length;
-      yield textNode;
-
-      textNode = {
-        i: node.i + index + length,
-        iEnd: null,
-        type: 'text',
-        data: '',
-      };
-    }
-
-    yield {
-      i: node.i + index,
-      iEnd: node.i + index + length,
-      type: 'comma-separator',
-    };
-
-    parseFrom = index + length;
-  }
-
-  if (parseFrom !== node.data.length) {
-    textNode.data += node.data.slice(parseFrom);
-    textNode.iEnd = node.iEnd;
-  }
-
-  if (textNode.data) {
-    yield textNode;
-  }
-}
-
-function* splitTextNodesAroundCommas(nodes) {
-  for (const node of nodes) {
-    if (node.type === 'text' && node.data.includes(',')) {
-      yield* splitTextNodeAroundCommas(node);
-    } else {
-      yield node;
-    }
-  }
-}
+import withAnnotationParts from './withAnnotationParts.js';
 
 export default templateCompositeFrom({
   annotation: `withSourceText`,
@@ -71,112 +11,42 @@ export default templateCompositeFrom({
   outputs: ['#sourceText'],
 
   steps: () => [
-    raiseOutputWithoutDependency({
-      dependency: 'annotation',
-      output: input.value({'#sourceText': null}),
+    withAnnotationParts({
+      mode: input.value('nodes'),
     }),
 
-    // Get the list of notes including custom comma-separator nodes,
-    // and do some basic processing to make details about this list
-    // available later.
-
-    {
-      dependencies: ['annotation'],
-      compute: (continuation, {
-        ['annotation']: annotation,
-      }) => continuation({
-        ['#nodes']:
-          Array.from(
-            splitTextNodesAroundCommas(
-              parseInput(annotation))),
-      }),
-    },
-
-    withLengthOfList({
-      list: '#nodes',
-    }),
-
-    withMappedList({
-      list: '#nodes',
-      map: input.value(node => node.type === 'comma-separator'),
-    }).outputs({
-      '#mappedList': '#commaSeparatorFilter',
-    }),
-
-    // Identify the first and last nodes in the range running from
-    // the first external link, up til (not including) the following
-    // comma separator.
-
-    {
-      dependencies: ['#nodes'],
-      compute: (continuation, {
-        ['#nodes']: nodes,
-      }) => continuation({
-        ['#firstExternalLink']:
-          nodes.find(node => node.type === 'external-link'),
-      }),
-    },
-
     raiseOutputWithoutDependency({
-      dependency: '#firstExternalLink',
+      dependency: '#annotationParts',
       output: input.value({'#sourceText': null}),
     }),
 
-    withNearbyItemFromList({
-      item: '#firstExternalLink',
-      list: '#nodes',
-      offset: input.value(+1),
-
-      filter: '#commaSeparatorFilter',
-    }).outputs({
-      '#nearbyItem': '#nextCommaSeparator',
-    }),
-
     {
-      dependencies: [
-        '#firstExternalLink',
-        '#nextCommaSeparator',
-        '#nodes',
-      ],
-
+      dependencies: ['#annotationParts'],
       compute: (continuation, {
-        ['#firstExternalLink']: firstExternalLink,
-        ['#nextCommaSeparator']: nextCommaSeparator,
-        ['#nodes']: nodes,
+        ['#annotationParts']: annotationParts,
       }) => continuation({
-        ['#lastNodeInRange']:
-          (nextCommaSeparator
-            ? nodes.at(nodes.indexOf(nextCommaSeparator) - 1)
-            : nodes.at(-1)),
+        ['#firstPartWithExternalLink']:
+          annotationParts
+            .find(nodes => nodes
+              .some(node => node.type === 'external-link')),
       }),
     },
 
-    // Extract the content text covered by that range.
-
-    withPropertyFromObject({
-      object: '#firstExternalLink',
-      property: input.value('i'),
-    }),
-
-    withPropertyFromObject({
-      object: '#lastNodeInRange',
-      property: input.value('iEnd'),
+    raiseOutputWithoutDependency({
+      dependency: '#firstPartWithExternalLink',
+      output: input.value({'#sourceText': null}),
     }),
 
     {
-      dependencies: [
-        '#firstExternalLink.i',
-        '#lastNodeInRange.iEnd',
-        'annotation',
-      ],
-
+      dependencies: ['annotation', '#firstPartWithExternalLink'],
       compute: (continuation, {
-        ['#firstExternalLink.i']: i,
-        ['#lastNodeInRange.iEnd']: iEnd,
         ['annotation']: annotation,
+        ['#firstPartWithExternalLink']: nodes,
       }) => continuation({
         ['#sourceText']:
-          annotation.slice(i, iEnd),
+          annotation.slice(
+            nodes.at(0).i,
+            nodes.at(-1).iEnd),
       }),
     },
   ],
diff --git a/src/data/things/content.js b/src/data/things/content.js
index d68fd5be..660d7020 100644
--- a/src/data/things/content.js
+++ b/src/data/things/content.js
@@ -15,7 +15,8 @@ import {
   withResultOfAvailabilityCheck,
 } from '#composite/control-flow';
 
-import {withWebArchiveDate, withSourceText} from '#composite/things/content';
+import {withAnnotationParts, withSourceText, withWebArchiveDate}
+  from '#composite/things/content';
 
 export class ContentEntry extends Thing {
   static [Thing.getPropertyDescriptors] = ({Artist}) => ({
@@ -100,6 +101,14 @@ export class ContentEntry extends Thing {
 
     // Expose only
 
+    annotationParts: [
+      withAnnotationParts({
+        mode: input.value('strings'),
+      }),
+
+      exposeDependency({dependency: '#annotationParts'}),
+    ],
+
     sourceText: [
       withSourceText(),
       exposeDependency({dependency: '#sourceText'}),
author	(quasar) nebula <qznebula@protonmail.com>	2025-05-01 14:49:30 -0300
committer	(quasar) nebula <qznebula@protonmail.com>	2025-05-06 12:29:05 -0300
commit	ac5e149c7220a473068536cd15e52fa0fbfaf2fe (patch)
tree	77c08249455b5a69c02526e118e97aeb1321b01e
parent	57dd7dbdafba63b6edbd330b98072f09957a5492 (diff)