diff options
author | (quasar) nebula <qznebula@protonmail.com> | 2024-07-25 13:19:39 -0300 |
---|---|---|
committer | (quasar) nebula <qznebula@protonmail.com> | 2025-04-13 22:54:14 -0300 |
commit | 933af66aaaabd32acf30b7ff8236a59d29a37464 (patch) | |
tree | d6a409d5208984b1f74fb876cffb2703e16d264c /src/data | |
parent | c8a54326365571adc5ef1816158b02eec78701cf (diff) |
data: withParsedContentEntries
Diffstat (limited to 'src/data')
-rw-r--r-- | src/data/composite/wiki-data/index.js | 1 | ||||
-rw-r--r-- | src/data/composite/wiki-data/withParsedCommentaryEntries.js | 72 | ||||
-rw-r--r-- | src/data/composite/wiki-data/withParsedContentEntries.js | 111 |
3 files changed, 119 insertions, 65 deletions
diff --git a/src/data/composite/wiki-data/index.js b/src/data/composite/wiki-data/index.js index ee7411f2..d2a60935 100644 --- a/src/data/composite/wiki-data/index.js +++ b/src/data/composite/wiki-data/index.js @@ -17,6 +17,7 @@ export {default as withContributionListSums} from './withContributionListSums.js export {default as withCoverArtDate} from './withCoverArtDate.js'; export {default as withDirectory} from './withDirectory.js'; export {default as withParsedCommentaryEntries} from './withParsedCommentaryEntries.js'; +export {default as withParsedContentEntries} from './withParsedContentEntries.js'; export {default as withRecontextualizedContributionList} from './withRecontextualizedContributionList.js'; export {default as withRedatedContributionList} from './withRedatedContributionList.js'; export {default as withResolvedAnnotatedReferenceList} from './withResolvedAnnotatedReferenceList.js'; diff --git a/src/data/composite/wiki-data/withParsedCommentaryEntries.js b/src/data/composite/wiki-data/withParsedCommentaryEntries.js index 9bf4278c..885ea28d 100644 --- a/src/data/composite/wiki-data/withParsedCommentaryEntries.js +++ b/src/data/composite/wiki-data/withParsedCommentaryEntries.js @@ -11,6 +11,7 @@ import { } from '#composite/data'; import inputSoupyFind from './inputSoupyFind.js'; +import withParsedContentEntries from './withParsedContentEntries.js'; import withResolvedReferenceList from './withResolvedReferenceList.js'; export default templateCompositeFrom({ @@ -23,72 +24,13 @@ export default templateCompositeFrom({ outputs: ['#parsedCommentaryEntries'], steps: () => [ - { - dependencies: [input('from')], - - compute: (continuation, { - [input('from')]: commentaryText, - }) => continuation({ - ['#rawMatches']: - Array.from(commentaryText.matchAll(commentaryRegexCaseSensitive)), - }), - }, - - withPropertiesFromList({ - list: '#rawMatches', - properties: input.value([ - '0', // The entire match as a string. - 'groups', - 'index', - ]), - }).outputs({ - '#rawMatches.0': '#rawMatches.text', - '#rawMatches.groups': '#rawMatches.groups', - '#rawMatches.index': '#rawMatches.startIndex', + withParsedContentEntries({ + from: input('from'), + caseSensitiveRegex: input.value(commentaryRegexCaseSensitive), }), - { - dependencies: [ - '#rawMatches.text', - '#rawMatches.startIndex', - ], - - compute: (continuation, { - ['#rawMatches.text']: text, - ['#rawMatches.startIndex']: startIndex, - }) => continuation({ - ['#rawMatches.endIndex']: - stitchArrays({text, startIndex}) - .map(({text, startIndex}) => startIndex + text.length), - }), - }, - - { - dependencies: [ - input('from'), - '#rawMatches.startIndex', - '#rawMatches.endIndex', - ], - - compute: (continuation, { - [input('from')]: commentaryText, - ['#rawMatches.startIndex']: startIndex, - ['#rawMatches.endIndex']: endIndex, - }) => continuation({ - ['#entries.body']: - stitchArrays({startIndex, endIndex}) - .map(({endIndex}, index, stitched) => - (index === stitched.length - 1 - ? commentaryText.slice(endIndex) - : commentaryText.slice( - endIndex, - stitched[index + 1].startIndex))) - .map(body => body.trim()), - }), - }, - withPropertiesFromList({ - list: '#rawMatches.groups', + list: '#parsedContentEntryHeadings', prefix: input.value('#entries'), properties: input.value([ 'artistReferences', @@ -228,7 +170,7 @@ export default templateCompositeFrom({ '#entries.dateKind', '#entries.accessDate', '#entries.accessKind', - '#entries.body', + '#parsedContentEntryBodies', ], compute: (continuation, { @@ -240,7 +182,7 @@ export default templateCompositeFrom({ ['#entries.dateKind']: dateKind, ['#entries.accessDate']: accessDate, ['#entries.accessKind']: accessKind, - ['#entries.body']: body, + ['#parsedContentEntryBodies']: body, }) => continuation({ ['#parsedCommentaryEntries']: stitchArrays({ diff --git a/src/data/composite/wiki-data/withParsedContentEntries.js b/src/data/composite/wiki-data/withParsedContentEntries.js new file mode 100644 index 00000000..2a9b3f6a --- /dev/null +++ b/src/data/composite/wiki-data/withParsedContentEntries.js @@ -0,0 +1,111 @@ +import {input, templateCompositeFrom} from '#composite'; +import {stitchArrays} from '#sugar'; +import {isContentString, validateInstanceOf} from '#validators'; + +import {withPropertiesFromList} from '#composite/data'; + +export default templateCompositeFrom({ + annotation: `withParsedContentEntries`, + + inputs: { + // TODO: Is there any way to validate this input based on the *other* + // inputs proivded, i.e. regexes? This kind of just assumes the string + // has already been validated according to the form the regex expects, + // which *is* always the case (as used), but it seems a bit awkward. + from: input({validate: isContentString}), + + caseSensitiveRegex: input({ + validate: validateInstanceOf(RegExp), + }), + }, + + outputs: [ + '#parsedContentEntryHeadings', + '#parsedContentEntryBodies', + ], + + steps: () => [ + { + dependencies: [ + input('from'), + input('caseSensitiveRegex'), + ], + + compute: (continuation, { + [input('from')]: commentaryText, + [input('caseSensitiveRegex')]: caseSensitiveRegex, + }) => continuation({ + ['#rawMatches']: + Array.from(commentaryText.matchAll(caseSensitiveRegex)), + }), + }, + + withPropertiesFromList({ + list: '#rawMatches', + properties: input.value([ + '0', // The entire match as a string. + 'groups', + 'index', + ]), + }).outputs({ + '#rawMatches.0': '#rawMatches.text', + '#rawMatches.groups': '#parsedContentEntryHeadings', + '#rawMatches.index': '#rawMatches.startIndex', + }), + + { + dependencies: [ + '#rawMatches.text', + '#rawMatches.startIndex', + ], + + compute: (continuation, { + ['#rawMatches.text']: text, + ['#rawMatches.startIndex']: startIndex, + }) => continuation({ + ['#rawMatches.endIndex']: + stitchArrays({text, startIndex}) + .map(({text, startIndex}) => startIndex + text.length), + }), + }, + + { + dependencies: [ + input('from'), + '#rawMatches.startIndex', + '#rawMatches.endIndex', + ], + + compute: (continuation, { + [input('from')]: commentaryText, + ['#rawMatches.startIndex']: startIndex, + ['#rawMatches.endIndex']: endIndex, + }) => continuation({ + ['#parsedContentEntryBodies']: + stitchArrays({startIndex, endIndex}) + .map(({endIndex}, index, stitched) => + (index === stitched.length - 1 + ? commentaryText.slice(endIndex) + : commentaryText.slice( + endIndex, + stitched[index + 1].startIndex))) + .map(body => body.trim()), + }), + }, + + { + dependencies: [ + '#parsedContentEntryHeadings', + '#parsedContentEntryBodies', + ], + + compute: (continuation, { + ['#parsedContentEntryHeadings']: parsedContentEntryHeadings, + ['#parsedContentEntryBodies']: parsedContentEntryBodies, + }) => continuation({ + ['#parsedContentEntryHeadings']: parsedContentEntryHeadings, + ['#parsedContentEntryBodies']: parsedContentEntryBodies, + }) + } + ], +}); |