From 6deea0629a3f3b9985d205d2f3a048893ea938c9 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Tue, 14 Nov 2023 22:20:45 -0400 Subject: data, test: withParsedCommentaryEntries --- .../wiki-data/withParsedCommentaryEntries.js | 181 +++++++++++++++++++++ src/data/composite/wiki-properties/commentary.js | 32 +++- .../wiki-data/withParsedCommentaryEntries.js | 102 ++++++++++++ 3 files changed, 308 insertions(+), 7 deletions(-) create mode 100644 src/data/composite/wiki-data/withParsedCommentaryEntries.js create mode 100644 test/unit/data/composite/wiki-data/withParsedCommentaryEntries.js diff --git a/src/data/composite/wiki-data/withParsedCommentaryEntries.js b/src/data/composite/wiki-data/withParsedCommentaryEntries.js new file mode 100644 index 00000000..5bd72dc9 --- /dev/null +++ b/src/data/composite/wiki-data/withParsedCommentaryEntries.js @@ -0,0 +1,181 @@ +import {input, templateCompositeFrom} from '#composite'; +import find from '#find'; +import {stitchArrays} from '#sugar'; +import {isCommentary} from '#validators'; + +import {fillMissingListItems, withPropertiesFromList} from '#composite/data'; + +import withResolvedReferenceList from './withResolvedReferenceList.js'; + +// Matches in roughly the format: +// +// artistReference: (annotation, date) +// +// where capturing group "annotation" can be any text at all, except that the +// last entry (past a comma or the only content within parentheses), if parsed +// as a date, is the capturing group "date". "Parsing as a date" means one of +// these formats: +// +// * "25 December 2019" - one or two number digits, followed by any text, +// followed by four number digits +// * "12/25/2019" - one or two number digits, a slash, one or two number +// digits, a slash, and two to four number digits +// +// The artist reference can optionally be boldface (in ), which will be +// captured as non-null in "boldfaceArtist". Otherwise it is all the characters +// between and and is captured in "artistReference" and is either the +// name of an artist or an "artist:directory"-style reference. +// +export const commentaryRegex = + /^(?)?(?.+):(?:<\/b>)?<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; + +export default templateCompositeFrom({ + annotation: `withParsedCommentaryEntries`, + + inputs: { + from: input({validate: isCommentary}), + }, + + outputs: ['#parsedCommentaryEntries'], + + steps: () => [ + { + dependencies: [input('from')], + + compute: (continuation, { + [input('from')]: commentaryText, + }) => continuation({ + ['#rawMatches']: + Array.from(commentaryText.matchAll(commentaryRegex)), + }), + }, + + withPropertiesFromList({ + list: '#rawMatches', + properties: input.value([ + '0', // The entire match as a string. + 'groups', + 'index', + ]), + }).outputs({ + '#rawMatches.0': '#rawMatches.text', + '#rawMatches.groups': '#rawMatches.groups', + '#rawMatches.index': '#rawMatches.startIndex', + }), + + { + dependencies: [ + '#rawMatches.text', + '#rawMatches.startIndex', + ], + + compute: (continuation, { + ['#rawMatches.text']: text, + ['#rawMatches.startIndex']: startIndex, + }) => continuation({ + ['#rawMatches.endIndex']: + stitchArrays({text, startIndex}) + .map(({text, startIndex}) => startIndex + text.length), + }), + }, + + { + dependencies: [ + input('from'), + '#rawMatches.startIndex', + '#rawMatches.endIndex', + ], + + compute: (continuation, { + [input('from')]: commentaryText, + ['#rawMatches.startIndex']: startIndex, + ['#rawMatches.endIndex']: endIndex, + }) => continuation({ + ['#entries.body']: + stitchArrays({startIndex, endIndex}) + .map(({endIndex}, index, stitched) => + (index === stitched.length - 1 + ? commentaryText.slice(endIndex) + : commentaryText.slice( + endIndex, + stitched[index + 1].startIndex))) + .map(body => body.trim()), + }), + }, + + withPropertiesFromList({ + list: '#rawMatches.groups', + prefix: input.value('#entries'), + properties: input.value([ + 'artistReference', + 'boldfaceArtist', + 'annotation', + 'date', + ]), + }), + + // The artistReference group will always have a value, since it's required + // for the line to match in the first place. + + withResolvedReferenceList({ + list: '#entries.artistReference', + data: 'artistData', + find: input.value(find.artist), + notFoundMode: input.value('null'), + }).outputs({ + '#resolvedReferenceList': '#entries.artist', + }), + + { + dependencies: ['#entries.boldfaceArtist'], + compute: (continuation, { + ['#entries.boldfaceArtist']: boldfaceArtist, + }) => continuation({ + ['#entries.boldfaceArtist']: + boldfaceArtist.map(boldface => boldface ? true : false), + }), + }, + + fillMissingListItems({ + list: '#entries.annotation', + fill: input.value(null), + }), + + { + dependencies: ['#entries.date'], + compute: (continuation, { + ['#entries.date']: date, + }) => continuation({ + ['#entries.date']: + date.map(date => date ? new Date(date) : null), + }), + }, + + { + dependencies: [ + '#entries.artist', + '#entries.boldfaceArtist', + '#entries.annotation', + '#entries.date', + '#entries.body', + ], + + compute: (continuation, { + ['#entries.artist']: artist, + ['#entries.boldfaceArtist']: boldfaceArtist, + ['#entries.annotation']: annotation, + ['#entries.date']: date, + ['#entries.body']: body, + }) => continuation({ + ['#parsedCommentaryEntries']: + stitchArrays({ + artist, + boldfaceArtist, + annotation, + date, + body, + }), + }), + }, + ], +}); diff --git a/src/data/composite/wiki-properties/commentary.js b/src/data/composite/wiki-properties/commentary.js index fbea9d5c..cd6b7ac4 100644 --- a/src/data/composite/wiki-properties/commentary.js +++ b/src/data/composite/wiki-properties/commentary.js @@ -1,12 +1,30 @@ // Artist commentary! Generally present on tracks and albums. +import {input, templateCompositeFrom} from '#composite'; import {isCommentary} from '#validators'; -// TODO: Not templateCompositeFrom. +import {exitWithoutDependency, exposeDependency} + from '#composite/control-flow'; +import {withParsedCommentaryEntries} from '#composite/wiki-data'; -export default function() { - return { - flags: {update: true, expose: true}, - update: {validate: isCommentary}, - }; -} +export default templateCompositeFrom({ + annotation: `commentary`, + + compose: false, + + steps: () => [ + exitWithoutDependency({ + dependency: input.updateValue({validate: isCommentary}), + mode: input.value('falsy'), + value: input.value(null), + }), + + withParsedCommentaryEntries({ + from: input.updateValue(), + }), + + exposeDependency({ + dependency: '#parsedCommentaryEntries', + }), + ], +}); diff --git a/test/unit/data/composite/wiki-data/withParsedCommentaryEntries.js b/test/unit/data/composite/wiki-data/withParsedCommentaryEntries.js new file mode 100644 index 00000000..7b456449 --- /dev/null +++ b/test/unit/data/composite/wiki-data/withParsedCommentaryEntries.js @@ -0,0 +1,102 @@ +import t from 'tap'; + +import {compositeFrom, input} from '#composite'; +import thingConstructors from '#things'; + +import {exposeDependency} from '#composite/control-flow'; +import {withParsedCommentaryEntries} from '#composite/wiki-data'; + +const {Artist} = thingConstructors; + +const composite = compositeFrom({ + compose: false, + + steps: [ + withParsedCommentaryEntries({ + from: 'from', + }), + + exposeDependency({dependency: '#parsedCommentaryEntries'}), + ], +}); + +function stubArtist(artistName = `Test Artist`) { + const artist = new Artist(); + artist.name = artistName; + + return artist; +} + +t.test(`withParsedCommentaryEntries: basic behavior`, t => { + t.plan(3); + + const artist1 = stubArtist(`Mobius Trip`); + const artist2 = stubArtist(`Hadron Kaleido`); + + const artistData = [artist1, artist2]; + + t.match(composite, { + expose: { + dependencies: ['from', 'artistData'], + }, + }); + + t.match(composite.expose.compute({ + artistData, + from: + `Mobius Trip:\n` + + `Some commentary.\n` + + `Very cool.\n`, + }), [ + { + artist: artist1, + boldfaceArtist: false, + annotation: null, + date: null, + body: `Some commentary.\nVery cool.`, + }, + ]); + + t.match(composite.expose.compute({ + artistData, + from: + `Mobius Trip: (music, art, 12 January 2015)\n` + + `First commentary entry.\n` + + `Very cool.\n` + + `Hadron Kaleido: (moral support, 4/4/2022)\n` + + `Second commentary entry. Yes. So cool.\n` + + `Mystery Artist: (pingas)\n` + + `Oh no.. Oh dear...\n` + + `Mobius Trip:\n` + + `And back around we go.`, + }), [ + { + artist: artist1, + boldfaceArtist: false, + annotation: `music, art`, + date: new Date('12 January 2015'), + body: `First commentary entry.\nVery cool.`, + }, + { + artist: artist2, + boldfaceArtist: true, + annotation: `moral support`, + date: new Date('4 April 2022'), + body: `Second commentary entry. Yes. So cool.`, + }, + { + artist: null, + boldfaceArtist: false, + annotation: `pingas`, + date: null, + body: `Oh no.. Oh dear...`, + }, + { + artist: artist1, + boldfaceArtist: false, + annotation: null, + date: null, + body: `And back around we go.`, + }, + ]); +}); -- cgit 1.3.0-6-gf8a5