« get me outta code hell

data: withSourceText: parse nodes - hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src/data/composite/things/content/withSourceText.js
diff options
context:
space:
mode:
author(quasar) nebula <qznebula@protonmail.com>2025-05-01 14:01:01 -0300
committer(quasar) nebula <qznebula@protonmail.com>2025-05-06 12:29:05 -0300
commit57dd7dbdafba63b6edbd330b98072f09957a5492 (patch)
tree529779314f34e53ba116422fe68a1fa97c5e9cc8 /src/data/composite/things/content/withSourceText.js
parent9f14f1dfc7aa6c00c0cfa07577208ad1bdcc62f7 (diff)
data: withSourceText: parse nodes
The node-splitting behavior is itself about identical to what
postprocessExternalLinks still does, so it would be nice to
factor that out, but we haven't done so yet.

Some degree of "parse stuff out of a comma-divided bunch of
source text" is probably worth factoring out too, later.
Diffstat (limited to 'src/data/composite/things/content/withSourceText.js')
-rw-r--r--src/data/composite/things/content/withSourceText.js171
1 files changed, 139 insertions, 32 deletions
diff --git a/src/data/composite/things/content/withSourceText.js b/src/data/composite/things/content/withSourceText.js
index cfab64a8..7f03f97d 100644
--- a/src/data/composite/things/content/withSourceText.js
+++ b/src/data/composite/things/content/withSourceText.js
@@ -1,10 +1,70 @@
-import * as marked from 'marked';
-
 import {input, templateCompositeFrom} from '#composite';
-import {matchMarkdownLinks} from '#wiki-data';
+import {parseInput} from '#replacer';
 
 import {raiseOutputWithoutDependency} from '#composite/control-flow';
 
+import {
+  withLengthOfList,
+  withMappedList,
+  withNearbyItemFromList,
+  withPropertyFromObject,
+} from '#composite/data';
+
+function* splitTextNodeAroundCommas(node) {
+  let textNode = {
+    i: node.i,
+    iEnd: null,
+    type: 'text',
+    data: '',
+  };
+
+  let parseFrom = 0;
+  for (const match of node.data.matchAll(/, */g)) {
+    const {index} = match, [{length}] = match;
+
+    textNode.data += node.data.slice(parseFrom, index);
+
+    if (textNode.data) {
+      textNode.iEnd = textNode.i + textNode.data.length;
+      yield textNode;
+
+      textNode = {
+        i: node.i + index + length,
+        iEnd: null,
+        type: 'text',
+        data: '',
+      };
+    }
+
+    yield {
+      i: node.i + index,
+      iEnd: node.i + index + length,
+      type: 'comma-separator',
+    };
+
+    parseFrom = index + length;
+  }
+
+  if (parseFrom !== node.data.length) {
+    textNode.data += node.data.slice(parseFrom);
+    textNode.iEnd = node.iEnd;
+  }
+
+  if (textNode.data) {
+    yield textNode;
+  }
+}
+
+function* splitTextNodesAroundCommas(nodes) {
+  for (const node of nodes) {
+    if (node.type === 'text' && node.data.includes(',')) {
+      yield* splitTextNodeAroundCommas(node);
+    } else {
+      yield node;
+    }
+  }
+}
+
 export default templateCompositeFrom({
   annotation: `withSourceText`,
 
@@ -16,60 +76,107 @@ export default templateCompositeFrom({
       output: input.value({'#sourceText': null}),
     }),
 
+    // Get the list of notes including custom comma-separator nodes,
+    // and do some basic processing to make details about this list
+    // available later.
+
     {
       dependencies: ['annotation'],
       compute: (continuation, {
         ['annotation']: annotation,
       }) => continuation({
-        ['#matches']:
-          Array.from(matchMarkdownLinks(annotation, {marked})),
+        ['#nodes']:
+          Array.from(
+            splitTextNodesAroundCommas(
+              parseInput(annotation))),
       }),
     },
 
-    raiseOutputWithoutDependency({
-      dependency: '#matches',
-      output: input.value({'#sourceText': null}),
-      mode: input.value('empty'),
+    withLengthOfList({
+      list: '#nodes',
+    }),
+
+    withMappedList({
+      list: '#nodes',
+      map: input.value(node => node.type === 'comma-separator'),
+    }).outputs({
+      '#mappedList': '#commaSeparatorFilter',
     }),
 
+    // Identify the first and last nodes in the range running from
+    // the first external link, up til (not including) the following
+    // comma separator.
+
     {
-      dependencies: ['#matches'],
+      dependencies: ['#nodes'],
       compute: (continuation, {
-        ['#matches']: matches,
-      }) =>
-        continuation({
-          ['#startIndex']:
-            matches.at(0).index,
-
-          ['#endIndex']:
-            matches.at(-1).index +
-            matches.at(-1).length,
-        }),
+        ['#nodes']: nodes,
+      }) => continuation({
+        ['#firstExternalLink']:
+          nodes.find(node => node.type === 'external-link'),
+      }),
     },
 
+    raiseOutputWithoutDependency({
+      dependency: '#firstExternalLink',
+      output: input.value({'#sourceText': null}),
+    }),
+
+    withNearbyItemFromList({
+      item: '#firstExternalLink',
+      list: '#nodes',
+      offset: input.value(+1),
+
+      filter: '#commaSeparatorFilter',
+    }).outputs({
+      '#nearbyItem': '#nextCommaSeparator',
+    }),
+
     {
-      dependencies: ['annotation', '#endIndex'],
+      dependencies: [
+        '#firstExternalLink',
+        '#nextCommaSeparator',
+        '#nodes',
+      ],
+
       compute: (continuation, {
-        ['annotation']: annotation,
-        ['#endIndex']: endIndex,
+        ['#firstExternalLink']: firstExternalLink,
+        ['#nextCommaSeparator']: nextCommaSeparator,
+        ['#nodes']: nodes,
       }) => continuation({
-        ['#rest']:
-          annotation.slice(endIndex)
-            .match(/^[^,]*(?=,|$)/),
+        ['#lastNodeInRange']:
+          (nextCommaSeparator
+            ? nodes.at(nodes.indexOf(nextCommaSeparator) - 1)
+            : nodes.at(-1)),
       }),
     },
 
+    // Extract the content text covered by that range.
+
+    withPropertyFromObject({
+      object: '#firstExternalLink',
+      property: input.value('i'),
+    }),
+
+    withPropertyFromObject({
+      object: '#lastNodeInRange',
+      property: input.value('iEnd'),
+    }),
+
     {
-      dependencies: ['annotation', '#startIndex', '#endIndex', '#rest'],
+      dependencies: [
+        '#firstExternalLink.i',
+        '#lastNodeInRange.iEnd',
+        'annotation',
+      ],
+
       compute: (continuation, {
+        ['#firstExternalLink.i']: i,
+        ['#lastNodeInRange.iEnd']: iEnd,
         ['annotation']: annotation,
-        ['#startIndex']: startIndex,
-        ['#endIndex']: endIndex,
-        ['#rest']: rest,
       }) => continuation({
         ['#sourceText']:
-          annotation.slice(startIndex, startIndex + endIndex) +
-          rest,
+          annotation.slice(i, iEnd),
       }),
     },
   ],