diff options
Diffstat (limited to 'src/data')
| -rw-r--r-- | src/data/checks.js | 415 | ||||
| -rw-r--r-- | src/data/yaml.js | 406 | 
2 files changed, 424 insertions, 397 deletions
| diff --git a/src/data/checks.js b/src/data/checks.js new file mode 100644 index 00000000..25e94aa9 --- /dev/null +++ b/src/data/checks.js @@ -0,0 +1,415 @@ +// checks.js - general validation and error/warning reporting for data objects + +import {inspect as nodeInspect} from 'node:util'; +import {colors, ENABLE_COLOR} from '#cli'; + +import CacheableObject from '#cacheable-object'; +import Thing from '#thing'; +import thingConstructors from '#things'; +import {commentaryRegexCaseSensitive} from '#wiki-data'; + +import { + compareArrays, + conditionallySuppressError, + decorateErrorWithIndex, + empty, + filterAggregate, + openAggregate, + withAggregate, +} from '#sugar'; + +function inspect(value, opts = {}) { + return nodeInspect(value, {colors: ENABLE_COLOR, ...opts}); +} + +// Warn about directories which are reused across more than one of the same type +// of Thing. Directories are the unique identifier for most data objects across +// the wiki, so we have to make sure they aren't duplicated! +export function reportDuplicateDirectories(wikiData, { + getAllFindSpecs, +}) { + const duplicateSets = []; + + for (const findSpec of Object.values(getAllFindSpecs())) { + if (!findSpec.bindTo) continue; + + const directoryPlaces = Object.create(null); + const duplicateDirectories = new Set(); + const thingData = wikiData[findSpec.bindTo]; + + for (const thing of thingData) { + if (findSpec.include && !findSpec.include(thing)) { + continue; + } + + const directories = + (findSpec.getMatchableDirectories + ? findSpec.getMatchableDirectories(thing) + : [thing.directory]); + + for (const directory of directories) { + if (directory in directoryPlaces) { + directoryPlaces[directory].push(thing); + duplicateDirectories.add(directory); + } else { + directoryPlaces[directory] = [thing]; + } + } + } + + if (empty(duplicateDirectories)) continue; + + const sortedDuplicateDirectories = + Array.from(duplicateDirectories) + .sort((a, b) => { + const aL = a.toLowerCase(); + const bL = b.toLowerCase(); + return aL < bL ? -1 : aL > bL ? 1 : 0; + }); + + for (const directory of sortedDuplicateDirectories) { + const places = directoryPlaces[directory]; + duplicateSets.push({directory, places}); + } + } + + if (empty(duplicateSets)) return; + + // Multiple find functions may effectively have duplicates across the same + // things. These only need to be reported once, because resolving one of them + // will resolve the rest, so cut out duplicate sets before reporting. + + const seenDuplicateSets = new Map(); + const deduplicateDuplicateSets = []; + + for (const set of duplicateSets) { + if (seenDuplicateSets.has(set.directory)) { + const placeLists = seenDuplicateSets.get(set.directory); + + for (const places of placeLists) { + // We're iterating globally over all duplicate directories, which may + // span multiple kinds of things, but that isn't going to cause an + // issue because we're comparing the contents by identity, anyway. + // Two artists named Foodog aren't going to match two tracks named + // Foodog. + if (compareArrays(places, set.places, {checkOrder: false})) { + continue; + } + } + + placeLists.push(set.places); + } else { + seenDuplicateSets.set(set.directory, [set.places]); + } + + deduplicateDuplicateSets.push(set); + } + + withAggregate({message: `Duplicate directories found`}, ({push}) => { + for (const {directory, places} of deduplicateDuplicateSets) { + push(new Error( + `Duplicate directory ${colors.green(`"${directory}"`)}:\n` + + places.map(thing => ` - ` + inspect(thing)).join('\n'))); + } + }); +} + +// Warn about references across data which don't match anything. This involves +// using the find() functions on all references, setting it to 'error' mode, and +// collecting everything in a structured logged (which gets logged if there are +// any errors). At the same time, we remove errored references from the thing's +// data array. +export function filterReferenceErrors(wikiData, { + bindFind, +}) { + const referenceSpec = [ + ['albumData', { + artistContribs: '_contrib', + coverArtistContribs: '_contrib', + trackCoverArtistContribs: '_contrib', + wallpaperArtistContribs: '_contrib', + bannerArtistContribs: '_contrib', + groups: 'group', + artTags: '_artTag', + commentary: '_commentary', + }], + + ['groupCategoryData', { + groups: 'group', + }], + + ['homepageLayout.rows', { + sourceGroup: '_homepageSourceGroup', + sourceAlbums: 'album', + }], + + ['flashData', { + contributorContribs: '_contrib', + featuredTracks: 'track', + }], + + ['flashActData', { + flashes: 'flash', + }], + + ['trackData', { + artistContribs: '_contrib', + contributorContribs: '_contrib', + coverArtistContribs: '_contrib', + referencedTracks: '_trackNotRerelease', + sampledTracks: '_trackNotRerelease', + artTags: '_artTag', + originalReleaseTrack: '_trackNotRerelease', + commentary: '_commentary', + }], + + ['wikiInfo', { + divideTrackListsByGroups: 'group', + }], + ]; + + function getNestedProp(obj, key) { + const recursive = (o, k) => + k.length === 1 ? o[k[0]] : recursive(o[k[0]], k.slice(1)); + const keys = key.split(/(?<=(?<!\\)(?:\\\\)*)\./); + return recursive(obj, keys); + } + + const boundFind = bindFind(wikiData, {mode: 'error'}); + + const findArtistOrAlias = artistRef => { + const alias = boundFind.artistAlias(artistRef, {mode: 'quiet'}); + if (alias) { + // No need to check if the original exists here. Aliases are automatically + // created from a field on the original, so the original certainly exists. + const original = alias.aliasedArtist; + throw new Error(`Reference ${colors.red(artistRef)} is to an alias, should be ${colors.green(original.name)}`); + } + + return boundFind.artist(artistRef); + }; + + const aggregate = openAggregate({message: `Errors validating between-thing references in data`}); + for (const [thingDataProp, propSpec] of referenceSpec) { + const thingData = getNestedProp(wikiData, thingDataProp); + + aggregate.nest({message: `Reference errors in ${colors.green('wikiData.' + thingDataProp)}`}, ({nest}) => { + const things = Array.isArray(thingData) ? thingData : [thingData]; + + for (const thing of things) { + nest({message: `Reference errors in ${inspect(thing)}`}, ({nest, push, filter}) => { + for (const [property, findFnKey] of Object.entries(propSpec)) { + let value = CacheableObject.getUpdateValue(thing, property); + let writeProperty = true; + + switch (findFnKey) { + case '_commentary': + if (value) { + value = + Array.from(value.matchAll(commentaryRegexCaseSensitive)) + .map(({groups}) => groups.artistReferences) + .map(text => text.split(',').map(text => text.trim())); + } + + writeProperty = false; + break; + + case '_contrib': + // Don't write out contributions - these'll be filtered out + // for content and data purposes automatically, and they're + // handy to keep around when update values get checked for + // art tags below. (Possibly no reference-related properties + // need writing, humm...) + writeProperty = false; + break; + } + + if (value === undefined) { + push(new TypeError(`Property ${colors.red(property)} isn't valid for ${colors.green(thing.constructor.name)}`)); + continue; + } + + if (value === null) { + continue; + } + + let findFn; + + switch (findFnKey) { + case '_artTag': + findFn = boundFind.artTag; + break; + + case '_commentary': + findFn = findArtistOrAlias; + break; + + case '_contrib': + findFn = contribRef => findArtistOrAlias(contribRef.who); + break; + + case '_homepageSourceGroup': + findFn = groupRef => { + if (groupRef === 'new-additions' || groupRef === 'new-releases') { + return true; + } + + return boundFind.group(groupRef); + }; + break; + + case '_trackNotRerelease': + findFn = trackRef => { + const track = boundFind.track(trackRef); + const originalRef = track && CacheableObject.getUpdateValue(track, 'originalReleaseTrack'); + + if (originalRef) { + // It's possible for the original to not actually exist, in this case. + // It should still be reported since the 'Originally Released As' field + // was present. + const original = boundFind.track(originalRef, {mode: 'quiet'}); + + // Prefer references by name, but only if it's unambiguous. + const originalByName = + (original + ? boundFind.track(original.name, {mode: 'quiet'}) + : null); + + const shouldBeMessage = + (originalByName + ? colors.green(original.name) + : original + ? colors.green('track:' + original.directory) + : colors.green(originalRef)); + + throw new Error(`Reference ${colors.red(trackRef)} is to a rerelease, should be ${shouldBeMessage}`); + } + + return track; + }; + break; + + default: + findFn = boundFind[findFnKey]; + break; + } + + const suppress = fn => conditionallySuppressError(error => { + if (property === 'sampledTracks') { + // Suppress "didn't match anything" errors in particular, just for samples. + // In hsmusic-data we have a lot of "stub" sample data which don't have + // corresponding tracks yet, so it won't be useful to report such reference + // errors until we take the time to address that. But other errors, like + // malformed reference strings or miscapitalized existing tracks, should + // still be reported, as samples of existing tracks *do* display on the + // website! + if (error.message.includes(`Didn't match anything`)) { + return true; + } + } + + return false; + }, fn); + + const {fields} = thing.constructor[Thing.yamlDocumentSpec]; + + const field = + Object.entries(fields ?? {}) + .find(([field, fieldSpec]) => fieldSpec.property === property) + ?.[0]; + + const fieldPropertyMessage = + (field + ? ` in field ${colors.green(field)}` + : ` in property ${colors.green(property)}`); + + const findFnMessage = + (findFnKey.startsWith('_') + ? `` + : ` (${colors.green('find.' + findFnKey)})`); + + const errorMessage = + (Array.isArray(value) + ? `Reference errors` + fieldPropertyMessage + findFnMessage + : `Reference error` + fieldPropertyMessage + findFnMessage); + + let newPropertyValue = value; + + determineNewPropertyValue: { + // TODO: The special-casing for artTag is obviously a bit janky. + // It would be nice if this could be moved to processDocument ala + // fieldCombinationErrors, but art tags are only an error if the + // thing doesn't have an artwork - which can't be determined from + // the track document on its own, thanks to inheriting contribs + // from the album. + if (findFnKey === '_artTag') { + let hasCoverArtwork = + !empty(CacheableObject.getUpdateValue(thing, 'coverArtistContribs')); + + if (thing.constructor === thingConstructors.Track) { + if (thing.album) { + hasCoverArtwork ||= + !empty(CacheableObject.getUpdateValue(thing.album, 'trackCoverArtistContribs')); + } + + if (thing.disableUniqueCoverArt) { + hasCoverArtwork = false; + } + } + + if (!hasCoverArtwork) { + nest({message: errorMessage}, ({push}) => { + push(new TypeError(`No cover artwork, so this shouldn't have art tags specified`)); + }); + + newPropertyValue = []; + break determineNewPropertyValue; + } + } + + if (findFnKey === '_commentary') { + filter( + value, {message: errorMessage}, + decorateErrorWithIndex(refs => + (refs.length === 1 + ? suppress(findFn)(refs[0]) + : filterAggregate( + refs, {message: `Errors in entry's artist references`}, + decorateErrorWithIndex(suppress(findFn))) + .aggregate + .close()))); + + // Commentary doesn't write a property value, so no need to set + // anything on `newPropertyValue`. + break determineNewPropertyValue; + } + + if (Array.isArray(value)) { + newPropertyValue = filter( + value, {message: errorMessage}, + decorateErrorWithIndex(suppress(findFn))); + break determineNewPropertyValue; + } + + nest({message: errorMessage}, + suppress(({call}) => { + try { + call(findFn, value); + } catch (error) { + newPropertyValue = null; + throw error; + } + })); + } + + if (writeProperty) { + thing[property] = newPropertyValue; + } + } + }); + } + }); + } + + return aggregate; +} diff --git a/src/data/yaml.js b/src/data/yaml.js index 77cab947..7a0643e8 100644 --- a/src/data/yaml.js +++ b/src/data/yaml.js @@ -7,22 +7,19 @@ import {inspect as nodeInspect} from 'node:util'; import yaml from 'js-yaml'; -import CacheableObject from '#cacheable-object'; import {colors, ENABLE_COLOR, logInfo, logWarn} from '#cli'; -import {bindFind, getAllFindSpecs} from '#find'; +import {reportDuplicateDirectories, filterReferenceErrors} + from '#data-checks'; import Thing from '#thing'; import thingConstructors from '#things'; -import {commentaryRegexCaseSensitive, sortByName} from '#wiki-data'; +import {sortByName} from '#wiki-data'; import { annotateErrorWithFile, atOffset, - compareArrays, - conditionallySuppressError, decorateErrorWithIndex, decorateErrorWithAnnotation, empty, - filterAggregate, filterProperties, openAggregate, showAggregate, @@ -1006,394 +1003,6 @@ export function sortWikiDataArrays(wikiData) { linkWikiDataArrays(wikiData); } -// Warn about directories which are reused across more than one of the same type -// of Thing. Directories are the unique identifier for most data objects across -// the wiki, so we have to make sure they aren't duplicated! -export function reportDuplicateDirectories(wikiData) { - const duplicateSets = []; - - for (const findSpec of Object.values(getAllFindSpecs())) { - if (!findSpec.bindTo) continue; - - const directoryPlaces = Object.create(null); - const duplicateDirectories = new Set(); - const thingData = wikiData[findSpec.bindTo]; - - for (const thing of thingData) { - if (findSpec.include && !findSpec.include(thing)) { - continue; - } - - const directories = - (findSpec.getMatchableDirectories - ? findSpec.getMatchableDirectories(thing) - : [thing.directory]); - - for (const directory of directories) { - if (directory in directoryPlaces) { - directoryPlaces[directory].push(thing); - duplicateDirectories.add(directory); - } else { - directoryPlaces[directory] = [thing]; - } - } - } - - if (empty(duplicateDirectories)) continue; - - const sortedDuplicateDirectories = - Array.from(duplicateDirectories) - .sort((a, b) => { - const aL = a.toLowerCase(); - const bL = b.toLowerCase(); - return aL < bL ? -1 : aL > bL ? 1 : 0; - }); - - for (const directory of sortedDuplicateDirectories) { - const places = directoryPlaces[directory]; - duplicateSets.push({directory, places}); - } - } - - if (empty(duplicateSets)) return; - - // Multiple find functions may effectively have duplicates across the same - // things. These only need to be reported once, because resolving one of them - // will resolve the rest, so cut out duplicate sets before reporting. - - const seenDuplicateSets = new Map(); - const deduplicateDuplicateSets = []; - - for (const set of duplicateSets) { - if (seenDuplicateSets.has(set.directory)) { - const placeLists = seenDuplicateSets.get(set.directory); - - for (const places of placeLists) { - // We're iterating globally over all duplicate directories, which may - // span multiple kinds of things, but that isn't going to cause an - // issue because we're comparing the contents by identity, anyway. - // Two artists named Foodog aren't going to match two tracks named - // Foodog. - if (compareArrays(places, set.places, {checkOrder: false})) { - continue; - } - } - - placeLists.push(set.places); - } else { - seenDuplicateSets.set(set.directory, [set.places]); - } - - deduplicateDuplicateSets.push(set); - } - - withAggregate({message: `Duplicate directories found`}, ({push}) => { - for (const {directory, places} of deduplicateDuplicateSets) { - push(new Error( - `Duplicate directory ${colors.green(`"${directory}"`)}:\n` + - places.map(thing => ` - ` + inspect(thing)).join('\n'))); - } - }); -} - -// Warn about references across data which don't match anything. This involves -// using the find() functions on all references, setting it to 'error' mode, and -// collecting everything in a structured logged (which gets logged if there are -// any errors). At the same time, we remove errored references from the thing's -// data array. -export function filterReferenceErrors(wikiData) { - const referenceSpec = [ - ['albumData', { - artistContribs: '_contrib', - coverArtistContribs: '_contrib', - trackCoverArtistContribs: '_contrib', - wallpaperArtistContribs: '_contrib', - bannerArtistContribs: '_contrib', - groups: 'group', - artTags: '_artTag', - commentary: '_commentary', - }], - - ['groupCategoryData', { - groups: 'group', - }], - - ['homepageLayout.rows', { - sourceGroup: '_homepageSourceGroup', - sourceAlbums: 'album', - }], - - ['flashData', { - contributorContribs: '_contrib', - featuredTracks: 'track', - }], - - ['flashActData', { - flashes: 'flash', - }], - - ['trackData', { - artistContribs: '_contrib', - contributorContribs: '_contrib', - coverArtistContribs: '_contrib', - referencedTracks: '_trackNotRerelease', - sampledTracks: '_trackNotRerelease', - artTags: '_artTag', - originalReleaseTrack: '_trackNotRerelease', - commentary: '_commentary', - }], - - ['wikiInfo', { - divideTrackListsByGroups: 'group', - }], - ]; - - function getNestedProp(obj, key) { - const recursive = (o, k) => - k.length === 1 ? o[k[0]] : recursive(o[k[0]], k.slice(1)); - const keys = key.split(/(?<=(?<!\\)(?:\\\\)*)\./); - return recursive(obj, keys); - } - - const boundFind = bindFind(wikiData, {mode: 'error'}); - - const findArtistOrAlias = artistRef => { - const alias = boundFind.artistAlias(artistRef, {mode: 'quiet'}); - if (alias) { - // No need to check if the original exists here. Aliases are automatically - // created from a field on the original, so the original certainly exists. - const original = alias.aliasedArtist; - throw new Error(`Reference ${colors.red(artistRef)} is to an alias, should be ${colors.green(original.name)}`); - } - - return boundFind.artist(artistRef); - }; - - const aggregate = openAggregate({message: `Errors validating between-thing references in data`}); - for (const [thingDataProp, propSpec] of referenceSpec) { - const thingData = getNestedProp(wikiData, thingDataProp); - - aggregate.nest({message: `Reference errors in ${colors.green('wikiData.' + thingDataProp)}`}, ({nest}) => { - const things = Array.isArray(thingData) ? thingData : [thingData]; - - for (const thing of things) { - nest({message: `Reference errors in ${inspect(thing)}`}, ({nest, push, filter}) => { - for (const [property, findFnKey] of Object.entries(propSpec)) { - let value = CacheableObject.getUpdateValue(thing, property); - let writeProperty = true; - - switch (findFnKey) { - case '_commentary': - if (value) { - value = - Array.from(value.matchAll(commentaryRegexCaseSensitive)) - .map(({groups}) => groups.artistReferences) - .map(text => text.split(',').map(text => text.trim())); - } - - writeProperty = false; - break; - - case '_contrib': - // Don't write out contributions - these'll be filtered out - // for content and data purposes automatically, and they're - // handy to keep around when update values get checked for - // art tags below. (Possibly no reference-related properties - // need writing, humm...) - writeProperty = false; - break; - } - - if (value === undefined) { - push(new TypeError(`Property ${colors.red(property)} isn't valid for ${colors.green(thing.constructor.name)}`)); - continue; - } - - if (value === null) { - continue; - } - - let findFn; - - switch (findFnKey) { - case '_artTag': - findFn = boundFind.artTag; - break; - - case '_commentary': - findFn = findArtistOrAlias; - break; - - case '_contrib': - findFn = contribRef => findArtistOrAlias(contribRef.who); - break; - - case '_homepageSourceGroup': - findFn = groupRef => { - if (groupRef === 'new-additions' || groupRef === 'new-releases') { - return true; - } - - return boundFind.group(groupRef); - }; - break; - - case '_trackNotRerelease': - findFn = trackRef => { - const track = boundFind.track(trackRef); - const originalRef = track && CacheableObject.getUpdateValue(track, 'originalReleaseTrack'); - - if (originalRef) { - // It's possible for the original to not actually exist, in this case. - // It should still be reported since the 'Originally Released As' field - // was present. - const original = boundFind.track(originalRef, {mode: 'quiet'}); - - // Prefer references by name, but only if it's unambiguous. - const originalByName = - (original - ? boundFind.track(original.name, {mode: 'quiet'}) - : null); - - const shouldBeMessage = - (originalByName - ? colors.green(original.name) - : original - ? colors.green('track:' + original.directory) - : colors.green(originalRef)); - - throw new Error(`Reference ${colors.red(trackRef)} is to a rerelease, should be ${shouldBeMessage}`); - } - - return track; - }; - break; - - default: - findFn = boundFind[findFnKey]; - break; - } - - const suppress = fn => conditionallySuppressError(error => { - if (property === 'sampledTracks') { - // Suppress "didn't match anything" errors in particular, just for samples. - // In hsmusic-data we have a lot of "stub" sample data which don't have - // corresponding tracks yet, so it won't be useful to report such reference - // errors until we take the time to address that. But other errors, like - // malformed reference strings or miscapitalized existing tracks, should - // still be reported, as samples of existing tracks *do* display on the - // website! - if (error.message.includes(`Didn't match anything`)) { - return true; - } - } - - return false; - }, fn); - - const {fields} = thing.constructor[Thing.yamlDocumentSpec]; - - const field = - Object.entries(fields ?? {}) - .find(([field, fieldSpec]) => fieldSpec.property === property) - ?.[0]; - - const fieldPropertyMessage = - (field - ? ` in field ${colors.green(field)}` - : ` in property ${colors.green(property)}`); - - const findFnMessage = - (findFnKey.startsWith('_') - ? `` - : ` (${colors.green('find.' + findFnKey)})`); - - const errorMessage = - (Array.isArray(value) - ? `Reference errors` + fieldPropertyMessage + findFnMessage - : `Reference error` + fieldPropertyMessage + findFnMessage); - - let newPropertyValue = value; - - determineNewPropertyValue: { - // TODO: The special-casing for artTag is obviously a bit janky. - // It would be nice if this could be moved to processDocument ala - // fieldCombinationErrors, but art tags are only an error if the - // thing doesn't have an artwork - which can't be determined from - // the track document on its own, thanks to inheriting contribs - // from the album. - if (findFnKey === '_artTag') { - let hasCoverArtwork = - !empty(CacheableObject.getUpdateValue(thing, 'coverArtistContribs')); - - if (thing.constructor === thingConstructors.Track) { - if (thing.album) { - hasCoverArtwork ||= - !empty(CacheableObject.getUpdateValue(thing.album, 'trackCoverArtistContribs')); - } - - if (thing.disableUniqueCoverArt) { - hasCoverArtwork = false; - } - } - - if (!hasCoverArtwork) { - nest({message: errorMessage}, ({push}) => { - push(new TypeError(`No cover artwork, so this shouldn't have art tags specified`)); - }); - - newPropertyValue = []; - break determineNewPropertyValue; - } - } - - if (findFnKey === '_commentary') { - filter( - value, {message: errorMessage}, - decorateErrorWithIndex(refs => - (refs.length === 1 - ? suppress(findFn)(refs[0]) - : filterAggregate( - refs, {message: `Errors in entry's artist references`}, - decorateErrorWithIndex(suppress(findFn))) - .aggregate - .close()))); - - // Commentary doesn't write a property value, so no need to set - // anything on `newPropertyValue`. - break determineNewPropertyValue; - } - - if (Array.isArray(value)) { - newPropertyValue = filter( - value, {message: errorMessage}, - decorateErrorWithIndex(suppress(findFn))); - break determineNewPropertyValue; - } - - nest({message: errorMessage}, - suppress(({call}) => { - try { - call(findFn, value); - } catch (error) { - newPropertyValue = null; - throw error; - } - })); - } - - if (writeProperty) { - thing[property] = newPropertyValue; - } - } - }); - } - }); - } - - return aggregate; -} - // Utility function for loading all wiki data from the provided YAML data // directory (e.g. the root of the hsmusic-data repository). This doesn't // provide much in the way of customization; it's meant to be used more as @@ -1401,8 +1010,11 @@ export function filterReferenceErrors(wikiData) { // where reporting info about data loading isn't as relevant as during the // main wiki build process. export async function quickLoadAllFromYAML(dataPath, { + bindFind, + getAllFindSpecs, + showAggregate: customShowAggregate = showAggregate, -} = {}) { +}) { const showAggregate = customShowAggregate; let wikiData; @@ -1424,7 +1036,7 @@ export async function quickLoadAllFromYAML(dataPath, { linkWikiDataArrays(wikiData); try { - reportDuplicateDirectories(wikiData).close(); + reportDuplicateDirectories(wikiData, {getAllFindSpecs}).close(); logInfo`No duplicate directories found. (complete data)`; } catch (error) { showAggregate(error); @@ -1432,7 +1044,7 @@ export async function quickLoadAllFromYAML(dataPath, { } try { - filterReferenceErrors(wikiData).close(); + filterReferenceErrors(wikiData, {bindFind}).close(); logInfo`No reference errors found. (complete data)`; } catch (error) { showAggregate(error); | 
