diff options
Diffstat (limited to 'src/data/composite/wiki-data')
| -rw-r--r-- | src/data/composite/wiki-data/index.js | 3 | ||||
| -rw-r--r-- | src/data/composite/wiki-data/processContentEntryDates.js | 181 | ||||
| -rw-r--r-- | src/data/composite/wiki-data/withParsedContentEntries.js | 111 | ||||
| -rw-r--r-- | src/data/composite/wiki-data/withParsedLyricsEntries.js | 157 | 
4 files changed, 0 insertions, 452 deletions
| diff --git a/src/data/composite/wiki-data/index.js b/src/data/composite/wiki-data/index.js index d2b82a2e..005c68c0 100644 --- a/src/data/composite/wiki-data/index.js +++ b/src/data/composite/wiki-data/index.js @@ -11,14 +11,11 @@ export {default as inputNotFoundMode} from './inputNotFoundMode.js'; export {default as inputSoupyFind} from './inputSoupyFind.js'; export {default as inputSoupyReverse} from './inputSoupyReverse.js'; export {default as inputWikiData} from './inputWikiData.js'; -export {default as processContentEntryDates} from './processContentEntryDates.js'; export {default as withClonedThings} from './withClonedThings.js'; export {default as withConstitutedArtwork} from './withConstitutedArtwork.js'; export {default as withContributionListSums} from './withContributionListSums.js'; export {default as withCoverArtDate} from './withCoverArtDate.js'; export {default as withDirectory} from './withDirectory.js'; -export {default as withParsedContentEntries} from './withParsedContentEntries.js'; -export {default as withParsedLyricsEntries} from './withParsedLyricsEntries.js'; export {default as withRecontextualizedContributionList} from './withRecontextualizedContributionList.js'; export {default as withRedatedContributionList} from './withRedatedContributionList.js'; export {default as withResolvedAnnotatedReferenceList} from './withResolvedAnnotatedReferenceList.js'; diff --git a/src/data/composite/wiki-data/processContentEntryDates.js b/src/data/composite/wiki-data/processContentEntryDates.js deleted file mode 100644 index e418a121..00000000 --- a/src/data/composite/wiki-data/processContentEntryDates.js +++ /dev/null @@ -1,181 +0,0 @@ -import {input, templateCompositeFrom} from '#composite'; -import {stitchArrays} from '#sugar'; -import {isContentString, isString, looseArrayOf} from '#validators'; - -import {fillMissingListItems} from '#composite/data'; - -// Important note: These two kinds of inputs have the exact same shape!! -// This isn't on purpose (besides that they *are* both supposed to be strings). -// They just don't have any more particular validation, yet. - -const inputDateList = defaultDependency => - input({ - validate: looseArrayOf(isString), - defaultDependency, - }); - -const inputKindList = defaultDependency => - input.staticDependency({ - validate: looseArrayOf(isString), - defaultDependency: defaultDependency, - }); - -export default templateCompositeFrom({ - annotation: `processContentEntryDates`, - - inputs: { - annotations: input({ - validate: looseArrayOf(isContentString), - defaultDependency: '#entries.annotation', - }), - - dates: inputDateList('#entries.date'), - secondDates: inputDateList('#entries.secondDate'), - accessDates: inputDateList('#entries.accessDate'), - - dateKinds: inputKindList('#entries.dateKind'), - accessKinds: inputKindList('#entries.accessKind'), - }, - - outputs: ({ - [input.staticDependency('dates')]: dates, - [input.staticDependency('secondDates')]: secondDates, - [input.staticDependency('accessDates')]: accessDates, - [input.staticDependency('dateKinds')]: dateKinds, - [input.staticDependency('accessKinds')]: accessKinds, - }) => [ - dates ?? '#processedContentEntryDates', - secondDates ?? '#processedContentEntrySecondDates', - accessDates ?? '#processedContentEntryAccessDates', - dateKinds ?? '#processedContentEntryDateKinds', - accessKinds ?? '#processedContentEntryAccessKinds', - ], - - steps: () => [ - { - dependencies: [input('annotations')], - compute: (continuation, { - [input('annotations')]: annotations, - }) => continuation({ - ['#webArchiveDates']: - annotations - .map(text => text?.match(/https?:\/\/web.archive.org\/web\/([0-9]{8,8})[0-9]*\//)) - .map(match => match?.[1]) - .map(dateText => - (dateText - ? dateText.slice(0, 4) + '/' + - dateText.slice(4, 6) + '/' + - dateText.slice(6, 8) - : null)), - }), - }, - - { - dependencies: [input('dates')], - compute: (continuation, { - [input('dates')]: dates, - }) => continuation({ - ['#processedContentEntryDates']: - dates - .map(date => date ? new Date(date) : null), - }), - }, - - { - dependencies: [input('secondDates')], - compute: (continuation, { - [input('secondDates')]: secondDates, - }) => continuation({ - ['#processedContentEntrySecondDates']: - secondDates - .map(date => date ? new Date(date) : null), - }), - }, - - fillMissingListItems({ - list: input('dateKinds'), - fill: input.value(null), - }).outputs({ - '#list': '#processedContentEntryDateKinds', - }), - - { - dependencies: [input('accessDates'), '#webArchiveDates'], - compute: (continuation, { - [input('accessDates')]: accessDates, - ['#webArchiveDates']: webArchiveDates, - }) => continuation({ - ['#processedContentEntryAccessDates']: - stitchArrays({ - accessDate: accessDates, - webArchiveDate: webArchiveDates - }).map(({accessDate, webArchiveDate}) => - accessDate ?? - webArchiveDate ?? - null) - .map(date => date ? new Date(date) : date), - }), - }, - - { - dependencies: [input('accessKinds'), '#webArchiveDates'], - compute: (continuation, { - [input('accessKinds')]: accessKinds, - ['#webArchiveDates']: webArchiveDates, - }) => continuation({ - ['#processedContentEntryAccessKinds']: - stitchArrays({ - accessKind: accessKinds, - webArchiveDate: webArchiveDates, - }).map(({accessKind, webArchiveDate}) => - accessKind ?? - (webArchiveDate && 'captured') ?? - null), - }), - }, - - // TODO: Annoying conversion step for outputs, would be nice to avoid. - { - dependencies: [ - '#processedContentEntryDates', - '#processedContentEntrySecondDates', - '#processedContentEntryAccessDates', - '#processedContentEntryDateKinds', - '#processedContentEntryAccessKinds', - input.staticDependency('dates'), - input.staticDependency('secondDates'), - input.staticDependency('accessDates'), - input.staticDependency('dateKinds'), - input.staticDependency('accessKinds'), - ], - - compute: (continuation, { - ['#processedContentEntryDates']: processedContentEntryDates, - ['#processedContentEntrySecondDates']: processedContentEntrySecondDates, - ['#processedContentEntryAccessDates']: processedContentEntryAccessDates, - ['#processedContentEntryDateKinds']: processedContentEntryDateKinds, - ['#processedContentEntryAccessKinds']: processedContentEntryAccessKinds, - [input.staticDependency('dates')]: dates, - [input.staticDependency('secondDates')]: secondDates, - [input.staticDependency('accessDates')]: accessDates, - [input.staticDependency('dateKinds')]: dateKinds, - [input.staticDependency('accessKinds')]: accessKinds, - }) => continuation({ - [dates ?? '#processedContentEntryDates']: - processedContentEntryDates, - - [secondDates ?? '#processedContentEntrySecondDates']: - processedContentEntrySecondDates, - - [accessDates ?? '#processedContentEntryAccessDates']: - processedContentEntryAccessDates, - - [dateKinds ?? '#processedContentEntryDateKinds']: - processedContentEntryDateKinds, - - [accessKinds ?? '#processedContentEntryAccessKinds']: - processedContentEntryAccessKinds, - }), - }, - ], -}); diff --git a/src/data/composite/wiki-data/withParsedContentEntries.js b/src/data/composite/wiki-data/withParsedContentEntries.js deleted file mode 100644 index 2a9b3f6a..00000000 --- a/src/data/composite/wiki-data/withParsedContentEntries.js +++ /dev/null @@ -1,111 +0,0 @@ -import {input, templateCompositeFrom} from '#composite'; -import {stitchArrays} from '#sugar'; -import {isContentString, validateInstanceOf} from '#validators'; - -import {withPropertiesFromList} from '#composite/data'; - -export default templateCompositeFrom({ - annotation: `withParsedContentEntries`, - - inputs: { - // TODO: Is there any way to validate this input based on the *other* - // inputs proivded, i.e. regexes? This kind of just assumes the string - // has already been validated according to the form the regex expects, - // which *is* always the case (as used), but it seems a bit awkward. - from: input({validate: isContentString}), - - caseSensitiveRegex: input({ - validate: validateInstanceOf(RegExp), - }), - }, - - outputs: [ - '#parsedContentEntryHeadings', - '#parsedContentEntryBodies', - ], - - steps: () => [ - { - dependencies: [ - input('from'), - input('caseSensitiveRegex'), - ], - - compute: (continuation, { - [input('from')]: commentaryText, - [input('caseSensitiveRegex')]: caseSensitiveRegex, - }) => continuation({ - ['#rawMatches']: - Array.from(commentaryText.matchAll(caseSensitiveRegex)), - }), - }, - - withPropertiesFromList({ - list: '#rawMatches', - properties: input.value([ - '0', // The entire match as a string. - 'groups', - 'index', - ]), - }).outputs({ - '#rawMatches.0': '#rawMatches.text', - '#rawMatches.groups': '#parsedContentEntryHeadings', - '#rawMatches.index': '#rawMatches.startIndex', - }), - - { - dependencies: [ - '#rawMatches.text', - '#rawMatches.startIndex', - ], - - compute: (continuation, { - ['#rawMatches.text']: text, - ['#rawMatches.startIndex']: startIndex, - }) => continuation({ - ['#rawMatches.endIndex']: - stitchArrays({text, startIndex}) - .map(({text, startIndex}) => startIndex + text.length), - }), - }, - - { - dependencies: [ - input('from'), - '#rawMatches.startIndex', - '#rawMatches.endIndex', - ], - - compute: (continuation, { - [input('from')]: commentaryText, - ['#rawMatches.startIndex']: startIndex, - ['#rawMatches.endIndex']: endIndex, - }) => continuation({ - ['#parsedContentEntryBodies']: - stitchArrays({startIndex, endIndex}) - .map(({endIndex}, index, stitched) => - (index === stitched.length - 1 - ? commentaryText.slice(endIndex) - : commentaryText.slice( - endIndex, - stitched[index + 1].startIndex))) - .map(body => body.trim()), - }), - }, - - { - dependencies: [ - '#parsedContentEntryHeadings', - '#parsedContentEntryBodies', - ], - - compute: (continuation, { - ['#parsedContentEntryHeadings']: parsedContentEntryHeadings, - ['#parsedContentEntryBodies']: parsedContentEntryBodies, - }) => continuation({ - ['#parsedContentEntryHeadings']: parsedContentEntryHeadings, - ['#parsedContentEntryBodies']: parsedContentEntryBodies, - }) - } - ], -}); diff --git a/src/data/composite/wiki-data/withParsedLyricsEntries.js b/src/data/composite/wiki-data/withParsedLyricsEntries.js deleted file mode 100644 index 419ceb84..00000000 --- a/src/data/composite/wiki-data/withParsedLyricsEntries.js +++ /dev/null @@ -1,157 +0,0 @@ -import {input, templateCompositeFrom} from '#composite'; -import {stitchArrays} from '#sugar'; -import {isLyrics} from '#validators'; -import {commentaryRegexCaseSensitive, multipleLyricsDetectionRegex} - from '#wiki-data'; - -import { - fillMissingListItems, - withFlattenedList, - withPropertiesFromList, - withUnflattenedList, -} from '#composite/data'; - -import inputSoupyFind from './inputSoupyFind.js'; -import processContentEntryDates from './processContentEntryDates.js'; -import withParsedContentEntries from './withParsedContentEntries.js'; -import withResolvedReferenceList from './withResolvedReferenceList.js'; - -function constituteLyricsEntry(text) { - return { - artists: [], - artistDisplayText: null, - annotation: null, - date: null, - secondDate: null, - dateKind: null, - accessDate: null, - accessKind: null, - body: text, - }; -} - -export default templateCompositeFrom({ - annotation: `withParsedLyricsEntries`, - - inputs: { - from: input({validate: isLyrics}), - }, - - outputs: ['#parsedLyricsEntries'], - - steps: () => [ - { - dependencies: [input('from')], - compute: (continuation, { - [input('from')]: lyrics, - }) => - (multipleLyricsDetectionRegex.test(lyrics) - ? continuation() - : continuation.raiseOutput({ - ['#parsedLyricsEntries']: - [constituteLyricsEntry(lyrics)], - })), - }, - - withParsedContentEntries({ - from: input('from'), - caseSensitiveRegex: input.value(commentaryRegexCaseSensitive), - }), - - withPropertiesFromList({ - list: '#parsedContentEntryHeadings', - prefix: input.value('#entries'), - properties: input.value([ - 'artistReferences', - 'artistDisplayText', - 'annotation', - 'date', - 'secondDate', - 'dateKind', - 'accessDate', - 'accessKind', - ]), - }), - - // The artistReferences group will always have a value, since it's required - // for the line to match in the first place. - - { - dependencies: ['#entries.artistReferences'], - compute: (continuation, { - ['#entries.artistReferences']: artistReferenceTexts, - }) => continuation({ - ['#entries.artistReferences']: - artistReferenceTexts - .map(text => text.split(',').map(ref => ref.trim())), - }), - }, - - withFlattenedList({ - list: '#entries.artistReferences', - }), - - withResolvedReferenceList({ - list: '#flattenedList', - find: inputSoupyFind.input('artist'), - notFoundMode: input.value('null'), - }), - - withUnflattenedList({ - list: '#resolvedReferenceList', - }).outputs({ - '#unflattenedList': '#entries.artists', - }), - - fillMissingListItems({ - list: '#entries.artistDisplayText', - fill: input.value(null), - }), - - fillMissingListItems({ - list: '#entries.annotation', - fill: input.value(null), - }), - - processContentEntryDates(), - - { - dependencies: [ - '#entries.artists', - '#entries.artistDisplayText', - '#entries.annotation', - '#entries.date', - '#entries.secondDate', - '#entries.dateKind', - '#entries.accessDate', - '#entries.accessKind', - '#parsedContentEntryBodies', - ], - - compute: (continuation, { - ['#entries.artists']: artists, - ['#entries.artistDisplayText']: artistDisplayText, - ['#entries.annotation']: annotation, - ['#entries.date']: date, - ['#entries.secondDate']: secondDate, - ['#entries.dateKind']: dateKind, - ['#entries.accessDate']: accessDate, - ['#entries.accessKind']: accessKind, - ['#parsedContentEntryBodies']: body, - }) => continuation({ - ['#parsedLyricsEntries']: - stitchArrays({ - artists, - artistDisplayText, - annotation, - date, - secondDate, - dateKind, - accessDate, - accessKind, - body, - }), - }), - }, - ], -}); | 
