diff options
Diffstat (limited to 'src/data/yaml.js')
-rw-r--r-- | src/data/yaml.js | 406 |
1 files changed, 9 insertions, 397 deletions
diff --git a/src/data/yaml.js b/src/data/yaml.js index 77cab947..7a0643e8 100644 --- a/src/data/yaml.js +++ b/src/data/yaml.js @@ -7,22 +7,19 @@ import {inspect as nodeInspect} from 'node:util'; import yaml from 'js-yaml'; -import CacheableObject from '#cacheable-object'; import {colors, ENABLE_COLOR, logInfo, logWarn} from '#cli'; -import {bindFind, getAllFindSpecs} from '#find'; +import {reportDuplicateDirectories, filterReferenceErrors} + from '#data-checks'; import Thing from '#thing'; import thingConstructors from '#things'; -import {commentaryRegexCaseSensitive, sortByName} from '#wiki-data'; +import {sortByName} from '#wiki-data'; import { annotateErrorWithFile, atOffset, - compareArrays, - conditionallySuppressError, decorateErrorWithIndex, decorateErrorWithAnnotation, empty, - filterAggregate, filterProperties, openAggregate, showAggregate, @@ -1006,394 +1003,6 @@ export function sortWikiDataArrays(wikiData) { linkWikiDataArrays(wikiData); } -// Warn about directories which are reused across more than one of the same type -// of Thing. Directories are the unique identifier for most data objects across -// the wiki, so we have to make sure they aren't duplicated! -export function reportDuplicateDirectories(wikiData) { - const duplicateSets = []; - - for (const findSpec of Object.values(getAllFindSpecs())) { - if (!findSpec.bindTo) continue; - - const directoryPlaces = Object.create(null); - const duplicateDirectories = new Set(); - const thingData = wikiData[findSpec.bindTo]; - - for (const thing of thingData) { - if (findSpec.include && !findSpec.include(thing)) { - continue; - } - - const directories = - (findSpec.getMatchableDirectories - ? findSpec.getMatchableDirectories(thing) - : [thing.directory]); - - for (const directory of directories) { - if (directory in directoryPlaces) { - directoryPlaces[directory].push(thing); - duplicateDirectories.add(directory); - } else { - directoryPlaces[directory] = [thing]; - } - } - } - - if (empty(duplicateDirectories)) continue; - - const sortedDuplicateDirectories = - Array.from(duplicateDirectories) - .sort((a, b) => { - const aL = a.toLowerCase(); - const bL = b.toLowerCase(); - return aL < bL ? -1 : aL > bL ? 1 : 0; - }); - - for (const directory of sortedDuplicateDirectories) { - const places = directoryPlaces[directory]; - duplicateSets.push({directory, places}); - } - } - - if (empty(duplicateSets)) return; - - // Multiple find functions may effectively have duplicates across the same - // things. These only need to be reported once, because resolving one of them - // will resolve the rest, so cut out duplicate sets before reporting. - - const seenDuplicateSets = new Map(); - const deduplicateDuplicateSets = []; - - for (const set of duplicateSets) { - if (seenDuplicateSets.has(set.directory)) { - const placeLists = seenDuplicateSets.get(set.directory); - - for (const places of placeLists) { - // We're iterating globally over all duplicate directories, which may - // span multiple kinds of things, but that isn't going to cause an - // issue because we're comparing the contents by identity, anyway. - // Two artists named Foodog aren't going to match two tracks named - // Foodog. - if (compareArrays(places, set.places, {checkOrder: false})) { - continue; - } - } - - placeLists.push(set.places); - } else { - seenDuplicateSets.set(set.directory, [set.places]); - } - - deduplicateDuplicateSets.push(set); - } - - withAggregate({message: `Duplicate directories found`}, ({push}) => { - for (const {directory, places} of deduplicateDuplicateSets) { - push(new Error( - `Duplicate directory ${colors.green(`"${directory}"`)}:\n` + - places.map(thing => ` - ` + inspect(thing)).join('\n'))); - } - }); -} - -// Warn about references across data which don't match anything. This involves -// using the find() functions on all references, setting it to 'error' mode, and -// collecting everything in a structured logged (which gets logged if there are -// any errors). At the same time, we remove errored references from the thing's -// data array. -export function filterReferenceErrors(wikiData) { - const referenceSpec = [ - ['albumData', { - artistContribs: '_contrib', - coverArtistContribs: '_contrib', - trackCoverArtistContribs: '_contrib', - wallpaperArtistContribs: '_contrib', - bannerArtistContribs: '_contrib', - groups: 'group', - artTags: '_artTag', - commentary: '_commentary', - }], - - ['groupCategoryData', { - groups: 'group', - }], - - ['homepageLayout.rows', { - sourceGroup: '_homepageSourceGroup', - sourceAlbums: 'album', - }], - - ['flashData', { - contributorContribs: '_contrib', - featuredTracks: 'track', - }], - - ['flashActData', { - flashes: 'flash', - }], - - ['trackData', { - artistContribs: '_contrib', - contributorContribs: '_contrib', - coverArtistContribs: '_contrib', - referencedTracks: '_trackNotRerelease', - sampledTracks: '_trackNotRerelease', - artTags: '_artTag', - originalReleaseTrack: '_trackNotRerelease', - commentary: '_commentary', - }], - - ['wikiInfo', { - divideTrackListsByGroups: 'group', - }], - ]; - - function getNestedProp(obj, key) { - const recursive = (o, k) => - k.length === 1 ? o[k[0]] : recursive(o[k[0]], k.slice(1)); - const keys = key.split(/(?<=(?<!\\)(?:\\\\)*)\./); - return recursive(obj, keys); - } - - const boundFind = bindFind(wikiData, {mode: 'error'}); - - const findArtistOrAlias = artistRef => { - const alias = boundFind.artistAlias(artistRef, {mode: 'quiet'}); - if (alias) { - // No need to check if the original exists here. Aliases are automatically - // created from a field on the original, so the original certainly exists. - const original = alias.aliasedArtist; - throw new Error(`Reference ${colors.red(artistRef)} is to an alias, should be ${colors.green(original.name)}`); - } - - return boundFind.artist(artistRef); - }; - - const aggregate = openAggregate({message: `Errors validating between-thing references in data`}); - for (const [thingDataProp, propSpec] of referenceSpec) { - const thingData = getNestedProp(wikiData, thingDataProp); - - aggregate.nest({message: `Reference errors in ${colors.green('wikiData.' + thingDataProp)}`}, ({nest}) => { - const things = Array.isArray(thingData) ? thingData : [thingData]; - - for (const thing of things) { - nest({message: `Reference errors in ${inspect(thing)}`}, ({nest, push, filter}) => { - for (const [property, findFnKey] of Object.entries(propSpec)) { - let value = CacheableObject.getUpdateValue(thing, property); - let writeProperty = true; - - switch (findFnKey) { - case '_commentary': - if (value) { - value = - Array.from(value.matchAll(commentaryRegexCaseSensitive)) - .map(({groups}) => groups.artistReferences) - .map(text => text.split(',').map(text => text.trim())); - } - - writeProperty = false; - break; - - case '_contrib': - // Don't write out contributions - these'll be filtered out - // for content and data purposes automatically, and they're - // handy to keep around when update values get checked for - // art tags below. (Possibly no reference-related properties - // need writing, humm...) - writeProperty = false; - break; - } - - if (value === undefined) { - push(new TypeError(`Property ${colors.red(property)} isn't valid for ${colors.green(thing.constructor.name)}`)); - continue; - } - - if (value === null) { - continue; - } - - let findFn; - - switch (findFnKey) { - case '_artTag': - findFn = boundFind.artTag; - break; - - case '_commentary': - findFn = findArtistOrAlias; - break; - - case '_contrib': - findFn = contribRef => findArtistOrAlias(contribRef.who); - break; - - case '_homepageSourceGroup': - findFn = groupRef => { - if (groupRef === 'new-additions' || groupRef === 'new-releases') { - return true; - } - - return boundFind.group(groupRef); - }; - break; - - case '_trackNotRerelease': - findFn = trackRef => { - const track = boundFind.track(trackRef); - const originalRef = track && CacheableObject.getUpdateValue(track, 'originalReleaseTrack'); - - if (originalRef) { - // It's possible for the original to not actually exist, in this case. - // It should still be reported since the 'Originally Released As' field - // was present. - const original = boundFind.track(originalRef, {mode: 'quiet'}); - - // Prefer references by name, but only if it's unambiguous. - const originalByName = - (original - ? boundFind.track(original.name, {mode: 'quiet'}) - : null); - - const shouldBeMessage = - (originalByName - ? colors.green(original.name) - : original - ? colors.green('track:' + original.directory) - : colors.green(originalRef)); - - throw new Error(`Reference ${colors.red(trackRef)} is to a rerelease, should be ${shouldBeMessage}`); - } - - return track; - }; - break; - - default: - findFn = boundFind[findFnKey]; - break; - } - - const suppress = fn => conditionallySuppressError(error => { - if (property === 'sampledTracks') { - // Suppress "didn't match anything" errors in particular, just for samples. - // In hsmusic-data we have a lot of "stub" sample data which don't have - // corresponding tracks yet, so it won't be useful to report such reference - // errors until we take the time to address that. But other errors, like - // malformed reference strings or miscapitalized existing tracks, should - // still be reported, as samples of existing tracks *do* display on the - // website! - if (error.message.includes(`Didn't match anything`)) { - return true; - } - } - - return false; - }, fn); - - const {fields} = thing.constructor[Thing.yamlDocumentSpec]; - - const field = - Object.entries(fields ?? {}) - .find(([field, fieldSpec]) => fieldSpec.property === property) - ?.[0]; - - const fieldPropertyMessage = - (field - ? ` in field ${colors.green(field)}` - : ` in property ${colors.green(property)}`); - - const findFnMessage = - (findFnKey.startsWith('_') - ? `` - : ` (${colors.green('find.' + findFnKey)})`); - - const errorMessage = - (Array.isArray(value) - ? `Reference errors` + fieldPropertyMessage + findFnMessage - : `Reference error` + fieldPropertyMessage + findFnMessage); - - let newPropertyValue = value; - - determineNewPropertyValue: { - // TODO: The special-casing for artTag is obviously a bit janky. - // It would be nice if this could be moved to processDocument ala - // fieldCombinationErrors, but art tags are only an error if the - // thing doesn't have an artwork - which can't be determined from - // the track document on its own, thanks to inheriting contribs - // from the album. - if (findFnKey === '_artTag') { - let hasCoverArtwork = - !empty(CacheableObject.getUpdateValue(thing, 'coverArtistContribs')); - - if (thing.constructor === thingConstructors.Track) { - if (thing.album) { - hasCoverArtwork ||= - !empty(CacheableObject.getUpdateValue(thing.album, 'trackCoverArtistContribs')); - } - - if (thing.disableUniqueCoverArt) { - hasCoverArtwork = false; - } - } - - if (!hasCoverArtwork) { - nest({message: errorMessage}, ({push}) => { - push(new TypeError(`No cover artwork, so this shouldn't have art tags specified`)); - }); - - newPropertyValue = []; - break determineNewPropertyValue; - } - } - - if (findFnKey === '_commentary') { - filter( - value, {message: errorMessage}, - decorateErrorWithIndex(refs => - (refs.length === 1 - ? suppress(findFn)(refs[0]) - : filterAggregate( - refs, {message: `Errors in entry's artist references`}, - decorateErrorWithIndex(suppress(findFn))) - .aggregate - .close()))); - - // Commentary doesn't write a property value, so no need to set - // anything on `newPropertyValue`. - break determineNewPropertyValue; - } - - if (Array.isArray(value)) { - newPropertyValue = filter( - value, {message: errorMessage}, - decorateErrorWithIndex(suppress(findFn))); - break determineNewPropertyValue; - } - - nest({message: errorMessage}, - suppress(({call}) => { - try { - call(findFn, value); - } catch (error) { - newPropertyValue = null; - throw error; - } - })); - } - - if (writeProperty) { - thing[property] = newPropertyValue; - } - } - }); - } - }); - } - - return aggregate; -} - // Utility function for loading all wiki data from the provided YAML data // directory (e.g. the root of the hsmusic-data repository). This doesn't // provide much in the way of customization; it's meant to be used more as @@ -1401,8 +1010,11 @@ export function filterReferenceErrors(wikiData) { // where reporting info about data loading isn't as relevant as during the // main wiki build process. export async function quickLoadAllFromYAML(dataPath, { + bindFind, + getAllFindSpecs, + showAggregate: customShowAggregate = showAggregate, -} = {}) { +}) { const showAggregate = customShowAggregate; let wikiData; @@ -1424,7 +1036,7 @@ export async function quickLoadAllFromYAML(dataPath, { linkWikiDataArrays(wikiData); try { - reportDuplicateDirectories(wikiData).close(); + reportDuplicateDirectories(wikiData, {getAllFindSpecs}).close(); logInfo`No duplicate directories found. (complete data)`; } catch (error) { showAggregate(error); @@ -1432,7 +1044,7 @@ export async function quickLoadAllFromYAML(dataPath, { } try { - filterReferenceErrors(wikiData).close(); + filterReferenceErrors(wikiData, {bindFind}).close(); logInfo`No reference errors found. (complete data)`; } catch (error) { showAggregate(error); |