diff options
Diffstat (limited to 'src/data/yaml.js')
-rw-r--r-- | src/data/yaml.js | 692 |
1 files changed, 642 insertions, 50 deletions
diff --git a/src/data/yaml.js b/src/data/yaml.js index a1eb73fb..9a0295b8 100644 --- a/src/data/yaml.js +++ b/src/data/yaml.js @@ -8,11 +8,14 @@ import {inspect as nodeInspect} from 'node:util'; import yaml from 'js-yaml'; import {colors, ENABLE_COLOR, logInfo, logWarn} from '#cli'; +import {parseContentNodes, splitContentNodesAround} from '#replacer'; import {sortByName} from '#sort'; import Thing from '#thing'; import thingConstructors from '#things'; +import {matchContentEntries, multipleLyricsDetectionRegex} from '#wiki-data'; import { + aggregateThrows, annotateErrorWithFile, decorateErrorWithIndex, decorateErrorWithAnnotation, @@ -30,8 +33,10 @@ import { atOffset, empty, filterProperties, + getNestedProp, stitchArrays, typeAppearance, + unique, withEntries, } from '#sugar'; @@ -86,6 +91,10 @@ function makeProcessDocument(thingConstructor, { // A or B. // invalidFieldCombinations = [], + + // Bouncing function used to process subdocuments: this is a function which + // in turn calls the appropriate *result of* makeProcessDocument. + processDocument: bouncer, }) { if (!thingConstructor) { throw new Error(`Missing Thing class`); @@ -95,6 +104,10 @@ function makeProcessDocument(thingConstructor, { throw new Error(`Expected fields to be provided`); } + if (!bouncer) { + throw new Error(`Missing processDocument bouncer`); + } + const knownFields = Object.keys(fieldSpecs); const ignoredFields = @@ -142,9 +155,12 @@ function makeProcessDocument(thingConstructor, { : `document`); const aggregate = openAggregate({ + ...aggregateThrows(ProcessDocumentError), message: `Errors processing ${constructorPart}` + namePart, }); + const thing = Reflect.construct(thingConstructor, []); + const documentEntries = Object.entries(document) .filter(([field]) => !ignoredFields.includes(field)); @@ -192,13 +208,50 @@ function makeProcessDocument(thingConstructor, { const fieldValues = {}; + const subdocSymbol = Symbol('subdoc'); + const subdocLayouts = {}; + + const isSubdocToken = value => + typeof value === 'object' && + value !== null && + Object.hasOwn(value, subdocSymbol); + + const transformUtilities = { + ...thingConstructors, + + subdoc(documentType, data, { + bindInto = null, + provide = null, + } = {}) { + if (!documentType) + throw new Error(`Expected document type, got ${typeAppearance(documentType)}`); + if (!data) + throw new Error(`Expected data, got ${typeAppearance(data)}`); + if (typeof data !== 'object' || data === null) + throw new Error(`Expected data to be an object, got ${typeAppearance(data)}`); + if (typeof bindInto !== 'string' && bindInto !== null) + throw new Error(`Expected bindInto to be a string, got ${typeAppearance(bindInto)}`); + if (typeof provide !== 'object' && provide !== null) + throw new Error(`Expected provide to be an object, got ${typeAppearance(provide)}`); + + return { + [subdocSymbol]: { + documentType, + data, + bindInto, + provide, + }, + }; + }, + }; + for (const [field, documentValue] of documentEntries) { if (skippedFields.has(field)) continue; // This variable would like to certify itself as "not into capitalism". let propertyValue = (fieldSpecs[field].transform - ? fieldSpecs[field].transform(documentValue) + ? fieldSpecs[field].transform(documentValue, transformUtilities) : documentValue); // Completely blank items in a YAML list are read as null. @@ -221,10 +274,99 @@ function makeProcessDocument(thingConstructor, { } } + if (isSubdocToken(propertyValue)) { + subdocLayouts[field] = propertyValue[subdocSymbol]; + continue; + } + + if (Array.isArray(propertyValue) && propertyValue.every(isSubdocToken)) { + subdocLayouts[field] = + propertyValue + .map(token => token[subdocSymbol]); + continue; + } + fieldValues[field] = propertyValue; } - const thing = Reflect.construct(thingConstructor, []); + const subdocErrors = []; + + const followSubdocSetup = setup => { + let error = null; + + let subthing; + try { + const result = bouncer(setup.data, setup.documentType); + subthing = result.thing; + result.aggregate.close(); + } catch (caughtError) { + error = caughtError; + } + + if (subthing) { + if (setup.bindInto) { + subthing[setup.bindInto] = thing; + } + + if (setup.provide) { + Object.assign(subthing, setup.provide); + } + } + + return {error, subthing}; + }; + + for (const [field, layout] of Object.entries(subdocLayouts)) { + if (Array.isArray(layout)) { + const subthings = []; + let anySucceeded = false; + let anyFailed = false; + + for (const [index, setup] of layout.entries()) { + const {subthing, error} = followSubdocSetup(setup); + if (error) { + subdocErrors.push(new SubdocError( + {field, index}, + setup, + {cause: error})); + } + + if (subthing) { + subthings.push(subthing); + anySucceeded = true; + } else { + anyFailed = true; + } + } + + if (anySucceeded) { + fieldValues[field] = subthings; + } else if (anyFailed) { + skippedFields.add(field); + } + } else { + const setup = layout; + const {subthing, error} = followSubdocSetup(setup); + + if (error) { + subdocErrors.push(new SubdocError( + {field}, + setup, + {cause: error})); + } + + if (subthing) { + fieldValues[field] = subthing; + } else { + skippedFields.add(field); + } + } + } + + if (!empty(subdocErrors)) { + aggregate.push(new SubdocAggregateError( + subdocErrors, thingConstructor)); + } const fieldValueErrors = []; @@ -258,6 +400,8 @@ function makeProcessDocument(thingConstructor, { }); } +export class ProcessDocumentError extends AggregateError {} + export class UnknownFieldsError extends Error { constructor(fields) { super(`Unknown fields ignored: ${fields.map(field => colors.red(field)).join(', ')}`); @@ -345,12 +489,46 @@ export class SkippedFieldsSummaryError extends Error { : `${entries.length} fields`); super( - colors.bright(colors.yellow(`Altogether, skipped ${numFieldsText}:\n`)) + + colors.bright(colors.yellow(`Altogether, skipped ${numFieldsText}:`)) + '\n' + lines.join('\n') + '\n' + colors.bright(colors.yellow(`See above errors for details.`))); } } +export class SubdocError extends Error { + constructor({field, index = null}, setup, options) { + const fieldText = + (index === null + ? colors.green(`"${field}"`) + : colors.yellow(`#${index + 1}`) + ' in ' + + colors.green(`"${field}"`)); + + const constructorText = + setup.documentType.name; + + if (options.cause instanceof ProcessDocumentError) { + options.cause[Symbol.for('hsmusic.aggregate.translucent')] = true; + } + + super( + `Errors processing ${constructorText} for ${fieldText} field`, + options); + } +} + +export class SubdocAggregateError extends AggregateError { + [Symbol.for('hsmusic.aggregate.translucent')] = true; + + constructor(errors, thingConstructor) { + const constructorText = + colors.green(thingConstructor.name); + + super( + errors, + `Errors processing subdocuments for ${constructorText}`); + } +} + export function parseDate(date) { return new Date(date); } @@ -433,49 +611,39 @@ export function parseContributors(entries) { }); } -export function parseAdditionalFiles(entries) { +export function parseAdditionalFiles(entries, {subdoc, AdditionalFile}) { return parseArrayEntries(entries, item => { if (typeof item !== 'object') return item; - return { - title: item['Title'], - description: item['Description'] ?? null, - files: item['Files'], - }; + return subdoc(AdditionalFile, item, {bindInto: 'thing'}); }); } -export function parseAdditionalNames(entries) { +export function parseAdditionalNames(entries, {subdoc, AdditionalName}) { return parseArrayEntries(entries, item => { - if (typeof item === 'object' && typeof item['Name'] === 'string') - return { - name: item['Name'], - annotation: item['Annotation'] ?? null, - }; + if (typeof item === 'object') { + return subdoc(AdditionalName, item, {bindInto: 'thing'}); + } if (typeof item !== 'string') return item; const match = item.match(extractAccentRegex); if (!match) return item; - return { - name: match.groups.main, - annotation: match.groups.accent ?? null, + const document = { + ['Name']: match.groups.main, + ['Annotation']: match.groups.accent ?? null, }; + + return subdoc(AdditionalName, document, {bindInto: 'thing'}); }); } -export function parseSerieses(entries) { +export function parseSerieses(entries, {subdoc, Series}) { return parseArrayEntries(entries, item => { if (typeof item !== 'object') return item; - return { - name: item['Name'], - description: item['Description'] ?? null, - albums: item['Albums'] ?? null, - - showAlbumArtists: item['Show Album Artists'] ?? null, - }; + return subdoc(Series, item, {bindInto: 'group'}); }); } @@ -613,6 +781,172 @@ export function parseAnnotatedReferences(entries, { }); } +export function parseArtwork({ + single = false, + thingProperty = null, + dimensionsFromThingProperty = null, + fileExtensionFromThingProperty = null, + dateFromThingProperty = null, + artistContribsFromThingProperty = null, + artistContribsArtistProperty = null, + artTagsFromThingProperty = null, + referencedArtworksFromThingProperty = null, +}) { + const provide = { + thingProperty, + dimensionsFromThingProperty, + fileExtensionFromThingProperty, + dateFromThingProperty, + artistContribsFromThingProperty, + artistContribsArtistProperty, + artTagsFromThingProperty, + referencedArtworksFromThingProperty, + }; + + const parseSingleEntry = (entry, {subdoc, Artwork}) => + subdoc(Artwork, entry, {bindInto: 'thing', provide}); + + const transform = (value, ...args) => + (Array.isArray(value) + ? value.map(entry => parseSingleEntry(entry, ...args)) + : single + ? parseSingleEntry(value, ...args) + : [parseSingleEntry(value, ...args)]); + + transform.provide = provide; + + return transform; +} + +export function parseContentEntriesFromSourceText(thingClass, sourceText, {subdoc}) { + function map(matchEntry) { + let artistText = null, artistReferences = null; + + const artistTextNodes = + Array.from( + splitContentNodesAround( + parseContentNodes(matchEntry.artistText), + /\|/g)); + + const separatorIndices = + artistTextNodes + .filter(node => node.type === 'separator') + .map(node => artistTextNodes.indexOf(node)); + + if (empty(separatorIndices)) { + if (artistTextNodes.length === 1 && artistTextNodes[0].type === 'text') { + artistReferences = matchEntry.artistText; + } else { + artistText = matchEntry.artistText; + } + } else { + const firstSeparatorIndex = + separatorIndices.at(0); + + const secondSeparatorIndex = + separatorIndices.at(1) ?? + artistTextNodes.length; + + artistReferences = + matchEntry.artistText.slice( + artistTextNodes.at(0).i, + artistTextNodes.at(firstSeparatorIndex - 1).iEnd); + + artistText = + matchEntry.artistText.slice( + artistTextNodes.at(firstSeparatorIndex).iEnd, + artistTextNodes.at(secondSeparatorIndex - 1).iEnd); + } + + if (artistReferences) { + artistReferences = + artistReferences + .split(',') + .map(ref => ref.trim()); + } + + return { + 'Artists': + artistReferences, + + 'Artist Text': + artistText, + + 'Annotation': + matchEntry.annotation, + + 'Date': + matchEntry.date, + + 'Second Date': + matchEntry.secondDate, + + 'Date Kind': + matchEntry.dateKind, + + 'Access Date': + matchEntry.accessDate, + + 'Access Kind': + matchEntry.accessKind, + + 'Body': + matchEntry.body, + }; + } + + const documents = + matchContentEntries(sourceText) + .map(matchEntry => + withEntries( + map(matchEntry), + entries => entries + .filter(([key, value]) => + value !== undefined && + value !== null))); + + const subdocs = + documents.map(document => + subdoc(thingClass, document, {bindInto: 'thing'})); + + return subdocs; +} + +export function parseContentEntries(thingClass, value, {subdoc}) { + if (typeof value === 'string') { + return parseContentEntriesFromSourceText(thingClass, value, {subdoc}); + } else if (Array.isArray(value)) { + return value.map(doc => subdoc(thingClass, doc, {bindInto: 'thing'})); + } else { + return value; + } +} + +export function parseCommentary(value, {subdoc, CommentaryEntry}) { + return parseContentEntries(CommentaryEntry, value, {subdoc}); +} + +export function parseCreditingSources(value, {subdoc, CreditingSourcesEntry}) { + return parseContentEntries(CreditingSourcesEntry, value, {subdoc}); +} + +export function parseReferencingSources(value, {subdoc, ReferencingSourcesEntry}) { + return parseContentEntries(ReferencingSourcesEntry, value, {subdoc}); +} + +export function parseLyrics(value, {subdoc, LyricsEntry}) { + if ( + typeof value === 'string' && + !multipleLyricsDetectionRegex.test(value) + ) { + const document = {'Body': value}; + + return [subdoc(LyricsEntry, document, {bindInto: 'thing'})]; + } + + return parseContentEntries(LyricsEntry, value, {subdoc}); +} + // documentModes: Symbols indicating sets of behavior for loading and processing // data files. export const documentModes = { @@ -688,16 +1022,23 @@ export const documentModes = { export function getAllDataSteps() { try { thingConstructors; - } catch (error) { + } catch { throw new Error(`Thing constructors aren't ready yet, can't get all data steps`); } const steps = []; + const seenLoadingFns = new Set(); + for (const thingConstructor of Object.values(thingConstructors)) { const getSpecFn = thingConstructor[Thing.getYamlLoadingSpec]; if (!getSpecFn) continue; + // Subclasses can expose literally the same static properties + // by inheritence. We don't want to double-count those! + if (seenLoadingFns.has(getSpecFn)) continue; + seenLoadingFns.add(getSpecFn); + steps.push(getSpecFn({ documentModes, thingConstructors, @@ -890,7 +1231,7 @@ export function processThingsFromDataStep(documents, dataStep) { throw new Error(`Class "${thingClass.name}" doesn't specify Thing.yamlDocumentSpec`); } - fn = makeProcessDocument(thingClass, spec); + fn = makeProcessDocument(thingClass, {...spec, processDocument}); submap.set(thingClass, fn); } @@ -905,15 +1246,23 @@ export function processThingsFromDataStep(documents, dataStep) { const aggregate = openAggregate({message: `Errors processing documents`}); documents.forEach( - decorateErrorWithIndex(document => { + decorateErrorWithIndex((document, index) => { const {thing, aggregate: subAggregate} = processDocument(document, dataStep.documentThing); + thing[Thing.yamlSourceDocument] = document; + thing[Thing.yamlSourceDocumentPlacement] = + [documentModes.allInOne, index]; + result.push(thing); aggregate.call(subAggregate.close); })); - return {aggregate, result}; + return { + aggregate, + result, + things: result, + }; } case documentModes.oneDocumentTotal: { @@ -923,7 +1272,15 @@ export function processThingsFromDataStep(documents, dataStep) { const {thing, aggregate} = processDocument(documents[0], dataStep.documentThing); - return {aggregate, result: thing}; + thing[Thing.yamlSourceDocument] = documents[0]; + thing[Thing.yamlSourceDocumentPlacement] = + [documentModes.oneDocumentTotal]; + + return { + aggregate, + result: thing, + things: [thing], + }; } case documentModes.headerAndEntries: { @@ -938,6 +1295,10 @@ export function processThingsFromDataStep(documents, dataStep) { const {thing: headerThing, aggregate: headerAggregate} = processDocument(headerDocument, dataStep.headerDocumentThing); + headerThing[Thing.yamlSourceDocument] = headerDocument; + headerThing[Thing.yamlSourceDocumentPlacement] = + [documentModes.headerAndEntries, 'header']; + try { headerAggregate.close(); } catch (caughtError) { @@ -951,6 +1312,10 @@ export function processThingsFromDataStep(documents, dataStep) { const {thing: entryThing, aggregate: entryAggregate} = processDocument(entryDocument, dataStep.entryDocumentThing); + entryThing[Thing.yamlSourceDocument] = entryDocument; + entryThing[Thing.yamlSourceDocumentPlacement] = + [documentModes.headerAndEntries, 'entry', index]; + entryThings.push(entryThing); try { @@ -967,6 +1332,7 @@ export function processThingsFromDataStep(documents, dataStep) { header: headerThing, entries: entryThings, }, + things: [headerThing, ...entryThings], }; } @@ -980,7 +1346,15 @@ export function processThingsFromDataStep(documents, dataStep) { const {thing, aggregate} = processDocument(documents[0], dataStep.documentThing); - return {aggregate, result: thing}; + thing[Thing.yamlSourceDocument] = documents[0]; + thing[Thing.yamlSourceDocumentPlacement] = + [documentModes.onePerFile]; + + return { + aggregate, + result: thing, + things: [thing], + }; } default: @@ -1080,9 +1454,16 @@ export async function processThingsFromDataSteps(documentLists, fileLists, dataS file: files, documents: documentLists, }).map(({file, documents}) => { - const {result, aggregate} = + const {result, aggregate, things} = processThingsFromDataStep(documents, dataStep); + for (const thing of things) { + thing[Thing.yamlSourceFilename] = + path.relative(dataPath, file) + .split(path.sep) + .join(path.posix.sep); + } + const close = decorateErrorWithFileFromDataPath(aggregate.close, {dataPath}); aggregate.close = () => close({file}); @@ -1230,39 +1611,50 @@ export function linkWikiDataArrays(wikiData, {bindFind, bindReverse}) { // entries must be present here even without any properties to explicitly // link if the 'find' or 'reverse' properties will be implicitly linked - [wikiData.albumData, [ - 'albumData', - 'trackData', + ['albumData', [ + 'artworkData', 'wikiInfo', ]], - [wikiData.artTagData, [/* reverse */]], + ['artTagData', [/* reverse */]], + + ['artistData', [/* find, reverse */]], + + ['artworkData', ['artworkData']], - [wikiData.artistData, [/* find, reverse */]], + ['commentaryData', [/* find */]], - [wikiData.flashData, [ + ['creditingSourceData', [/* find */]], + + ['flashData', [ 'wikiInfo', ]], - [wikiData.flashActData, [/* find, reverse */]], + ['flashActData', [/* find, reverse */]], + + ['flashSideData', [/* find */]], + + ['groupData', [/* find, reverse */]], - [wikiData.flashSideData, [/* find */]], + ['groupCategoryData', [/* find */]], - [wikiData.groupData, [/* find, reverse */]], + ['homepageLayout.sections.rows', [/* find */]], - [wikiData.groupCategoryData, [/* find */]], + ['lyricsData', [/* find */]], - [wikiData.homepageLayout.rows, [/* find */]], + ['referencingSourceData', [/* find */]], - [wikiData.trackData, [ - 'albumData', + ['seriesData', [/* find */]], + + ['trackData', [ + 'artworkData', 'trackData', 'wikiInfo', ]], - [wikiData.trackSectionData, [/* reverse */]], + ['trackSectionData', [/* reverse */]], - [[wikiData.wikiInfo], [/* find */]], + ['wikiInfo', [/* find */]], ]); const constructorHasFindMap = new Map(); @@ -1271,8 +1663,12 @@ export function linkWikiDataArrays(wikiData, {bindFind, bindReverse}) { const boundFind = bindFind(wikiData); const boundReverse = bindReverse(wikiData); - for (const [things, keys] of linkWikiDataSpec.entries()) { - if (things === undefined) continue; + for (const [thingDataProp, keys] of linkWikiDataSpec.entries()) { + const thingData = getNestedProp(wikiData, thingDataProp); + const things = + (Array.isArray(thingData) + ? thingData.flat(Infinity) + : [thingData]); for (const thing of things) { if (thing === undefined) continue; @@ -1393,3 +1789,199 @@ export async function quickLoadAllFromYAML(dataPath, { return wikiData; } + +export function cruddilyGetAllThings(wikiData) { + const allThings = []; + + for (const v of Object.values(wikiData)) { + if (Array.isArray(v)) { + allThings.push(...v); + } else { + allThings.push(v); + } + } + + return allThings; +} + +export function getThingLayoutForFilename(filename, wikiData) { + const things = + cruddilyGetAllThings(wikiData) + .filter(thing => + thing[Thing.yamlSourceFilename] === filename); + + if (empty(things)) { + return null; + } + + const allDocumentModes = + unique(things.map(thing => + thing[Thing.yamlSourceDocumentPlacement][0])); + + if (allDocumentModes.length > 1) { + throw new Error(`More than one document mode for documents from ${filename}`); + } + + const documentMode = allDocumentModes[0]; + + switch (documentMode) { + case documentModes.allInOne: { + return { + documentMode, + things: + things.sort((a, b) => + a[Thing.yamlSourceDocumentPlacement][1] - + b[Thing.yamlSourceDocumentPlacement][1]), + }; + } + + case documentModes.oneDocumentTotal: + case documentModes.onePerFile: { + if (things.length > 1) { + throw new Error(`More than one document for ${filename}`); + } + + return { + documentMode, + thing: things[0], + }; + } + + case documentModes.headerAndEntries: { + const headerThings = + things.filter(thing => + thing[Thing.yamlSourceDocumentPlacement][1] === 'header'); + + if (headerThings.length > 1) { + throw new Error(`More than one header document for ${filename}`); + } + + return { + documentMode, + headerThing: headerThings[0] ?? null, + entryThings: + things + .filter(thing => + thing[Thing.yamlSourceDocumentPlacement][1] === 'entry') + .sort((a, b) => + a[Thing.yamlSourceDocumentPlacement][2] - + b[Thing.yamlSourceDocumentPlacement][2]), + }; + } + + default: { + return {documentMode}; + } + } +} + +export function flattenThingLayoutToDocumentOrder(layout) { + switch (layout.documentMode) { + case documentModes.oneDocumentTotal: + case documentModes.onePerFile: { + if (layout.thing) { + return [0]; + } else { + return []; + } + } + + case documentModes.allInOne: { + const indices = + layout.things + .map(thing => thing[Thing.yamlSourceDocumentPlacement][1]); + + return indices; + } + + case documentModes.headerAndEntries: { + const entryIndices = + layout.entryThings + .map(thing => thing[Thing.yamlSourceDocumentPlacement][2]) + .map(index => index + 1); + + if (layout.headerThing) { + return [0, ...entryIndices]; + } else { + return entryIndices; + } + } + + default: { + throw new Error(`Unknown document mode`); + } + } +} + +export function* splitDocumentsInYAMLSourceText(sourceText) { + // Not multiline! + const dividerRegex = /(?:\r\n|\n|^)-{3,}(?:\r\n|\n|$)/g; + + let previousDivider = ''; + + while (true) { + const {lastIndex} = dividerRegex; + const match = dividerRegex.exec(sourceText); + if (match) { + const nextDivider = match[0]; + + yield { + previousDivider, + nextDivider, + text: sourceText.slice(lastIndex, match.index), + }; + + previousDivider = nextDivider; + } else { + const nextDivider = ''; + const lineBreak = previousDivider.match(/\r?\n/)?.[0] ?? ''; + + yield { + previousDivider, + nextDivider, + text: sourceText.slice(lastIndex).replace(/(?<!\n)$/, lineBreak), + }; + + return; + } + } +} + +export function recombineDocumentsIntoYAMLSourceText(documents) { + const dividers = + unique( + documents + .flatMap(d => [d.previousDivider, d.nextDivider]) + .filter(Boolean)); + + const divider = dividers[0]; + + if (dividers.length > 1) { + // TODO: Accommodate mixed dividers as best as we can lol + logWarn`Found multiple dividers in this file, using only ${divider}`; + } + + let sourceText = ''; + + for (const document of documents) { + if (sourceText) { + sourceText += divider; + } + + sourceText += document.text; + } + + return sourceText; +} + +export function reorderDocumentsInYAMLSourceText(sourceText, order) { + const sourceDocuments = + Array.from(splitDocumentsInYAMLSourceText(sourceText)); + + const sortedDocuments = + Array.from( + order, + sourceIndex => sourceDocuments[sourceIndex]); + + return recombineDocumentsIntoYAMLSourceText(sortedDocuments); +} |