From 362dc0619b93d74ad34df1bfbfd9ebc632fa5156 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Tue, 14 Nov 2023 22:49:51 -0400 Subject: data, yaml: catch commentary artist ref errors --- .../wiki-data/withParsedCommentaryEntries.js | 23 +-------- src/data/yaml.js | 55 ++++++++++++++++------ src/util/wiki-data.js | 25 ++++++++++ 3 files changed, 67 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/data/composite/wiki-data/withParsedCommentaryEntries.js b/src/data/composite/wiki-data/withParsedCommentaryEntries.js index 5bd72dc..9e33cda 100644 --- a/src/data/composite/wiki-data/withParsedCommentaryEntries.js +++ b/src/data/composite/wiki-data/withParsedCommentaryEntries.js @@ -2,33 +2,12 @@ import {input, templateCompositeFrom} from '#composite'; import find from '#find'; import {stitchArrays} from '#sugar'; import {isCommentary} from '#validators'; +import {commentaryRegex} from '#wiki-data'; import {fillMissingListItems, withPropertiesFromList} from '#composite/data'; import withResolvedReferenceList from './withResolvedReferenceList.js'; -// Matches in roughly the format: -// -// artistReference: (annotation, date) -// -// where capturing group "annotation" can be any text at all, except that the -// last entry (past a comma or the only content within parentheses), if parsed -// as a date, is the capturing group "date". "Parsing as a date" means one of -// these formats: -// -// * "25 December 2019" - one or two number digits, followed by any text, -// followed by four number digits -// * "12/25/2019" - one or two number digits, a slash, one or two number -// digits, a slash, and two to four number digits -// -// The artist reference can optionally be boldface (in ), which will be -// captured as non-null in "boldfaceArtist". Otherwise it is all the characters -// between and and is captured in "artistReference" and is either the -// name of an artist or an "artist:directory"-style reference. -// -export const commentaryRegex = - /^(?)?(?.+):(?:<\/b>)?<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; - export default templateCompositeFrom({ annotation: `withParsedCommentaryEntries`, diff --git a/src/data/yaml.js b/src/data/yaml.js index 986f25d..843e70b 100644 --- a/src/data/yaml.js +++ b/src/data/yaml.js @@ -28,6 +28,7 @@ import { } from '#sugar'; import { + commentaryRegex, sortAlbumsTracksChronologically, sortAlphabetically, sortChronologically, @@ -1616,6 +1617,7 @@ export function filterReferenceErrors(wikiData) { bannerArtistContribs: '_contrib', groups: 'group', artTags: 'artTag', + commentary: '_commentary', }], ['trackData', processTrackDocument, { @@ -1626,6 +1628,7 @@ export function filterReferenceErrors(wikiData) { sampledTracks: '_trackNotRerelease', artTags: 'artTag', originalReleaseTrack: '_trackNotRerelease', + commentary: '_commentary', }], ['groupCategoryData', processGroupCategoryDocument, { @@ -1675,7 +1678,19 @@ export function filterReferenceErrors(wikiData) { nest({message: `Reference errors in ${inspect(thing)}`}, ({nest, push, filter}) => { for (const [property, findFnKey] of Object.entries(propSpec)) { - const value = CacheableObject.getUpdateValue(thing, property); + let value = CacheableObject.getUpdateValue(thing, property); + let writeProperty = true; + + switch (findFnKey) { + case '_commentary': + if (value) { + value = + Array.from(value.matchAll(commentaryRegex)) + .map(({groups}) => groups.artistReference); + } + writeProperty = false; + break; + } if (value === undefined) { push(new TypeError(`Property ${colors.red(property)} isn't valid for ${colors.green(thing.constructor.name)}`)); @@ -1688,19 +1703,25 @@ export function filterReferenceErrors(wikiData) { let findFn; + const findArtistOrAlias = artistRef => { + const alias = find.artist(artistRef, wikiData.artistAliasData, {mode: 'quiet'}); + if (alias) { + // No need to check if the original exists here. Aliases are automatically + // created from a field on the original, so the original certainly exists. + const original = alias.aliasedArtist; + throw new Error(`Reference ${colors.red(artistRef)} is to an alias, should be ${colors.green(original.name)}`); + } + + return boundFind.artist(artistRef); + }; + switch (findFnKey) { - case '_contrib': - findFn = contribRef => { - const alias = find.artist(contribRef.who, wikiData.artistAliasData, {mode: 'quiet'}); - if (alias) { - // No need to check if the original exists here. Aliases are automatically - // created from a field on the original, so the original certainly exists. - const original = alias.aliasedArtist; - throw new Error(`Reference ${colors.red(contribRef.who)} is to an alias, should be ${colors.green(original.name)}`); - } + case '_commentary': + findFn = findArtistOrAlias; + break; - return boundFind.artist(contribRef.who); - }; + case '_contrib': + findFn = contribRef => findArtistOrAlias(contribRef.who); break; case '_homepageSourceGroup': @@ -1781,8 +1802,10 @@ export function filterReferenceErrors(wikiData) { ? `Reference errors` + fieldPropertyMessage + findFnMessage : `Reference error` + fieldPropertyMessage + findFnMessage); + let newPropertyValue = value; + if (Array.isArray(value)) { - thing[property] = filter( + newPropertyValue = filter( value, decorateErrorWithIndex(suppress(findFn)), {message: errorMessage}); @@ -1792,11 +1815,15 @@ export function filterReferenceErrors(wikiData) { try { call(findFn, value); } catch (error) { - thing[property] = null; + newPropertyValue = null; throw error; } })); } + + if (writeProperty) { + thing[property] = newPropertyValue; + } } }); } diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 0790ae9..5ab0122 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -629,6 +629,31 @@ export function sortFlashesChronologically(data, { // Specific data utilities +// Matches heading details from commentary data in roughly the format: +// +// artistReference: (annotation, date) +// +// where capturing group "annotation" can be any text at all, except that the +// last entry (past a comma or the only content within parentheses), if parsed +// as a date, is the capturing group "date". "Parsing as a date" means one of +// these formats: +// +// * "25 December 2019" - one or two number digits, followed by any text, +// followed by four number digits +// * "12/25/2019" - one or two number digits, a slash, one or two number +// digits, a slash, and two to four number digits +// +// The artist reference can optionally be boldface (in ), which will be +// captured as non-null in "boldfaceArtist". Otherwise it is all the characters +// between and and is captured in "artistReference" and is either the +// name of an artist or an "artist:directory"-style reference. +// +// This regular expression *doesn't* match bodies, which will need to be parsed +// out of the original string based on the indices matched using this. +// +export const commentaryRegex = + /^(?)?(?.+):(?:<\/b>)?<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; + export function filterAlbumsByCommentary(albums) { return albums .filter((album) => [album, ...album.tracks].some((x) => x.commentary)); -- cgit 1.3.0-6-gf8a5