From 416b7261fa3c9e3d0873fdc9faf501014462e06c Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Mon, 3 Jan 2022 23:23:12 -0400 Subject: refine link/ref resolving & data post-processing --- src/upd8.js | 181 ++++++++++++++++++++++++++++++++++----------------- src/util/find.js | 83 +++++++++++++++-------- src/util/replacer.js | 9 ++- 3 files changed, 184 insertions(+), 89 deletions(-) diff --git a/src/upd8.js b/src/upd8.js index 12e301ca..a7058395 100755 --- a/src/upd8.js +++ b/src/upd8.js @@ -186,7 +186,6 @@ import { bindOpts, call, filterEmptyLines, - mapInPlace, queue, splitArray, unique, @@ -2683,16 +2682,6 @@ async function main() { } } - { - for (const { references, name, album } of WD.trackData) { - for (const ref of references) { - if (!find.track(ref, {wikiData})) { - logWarn`Track not found "${ref}" in ${name} (${album.name})`; - } - } - } - } - WD.contributionData = Array.from(new Set([ ...WD.trackData.flatMap(track => [...track.artists || [], ...track.contributors || [], ...track.coverArtists || []]), ...WD.albumData.flatMap(album => [...album.artists || [], ...album.coverArtists || [], ...album.wallpaperArtists || [], ...album.bannerArtists || []]), @@ -2705,68 +2694,136 @@ async function main() { // more than once. (We 8uild a few additional links that can't 8e cre8ted // at initial data processing time here too.) - const filterNullArray = (parent, key) => { - for (const obj of parent) { - const array = obj[key]; - for (let i = 0; i < array.length; i++) { - if (!array[i]) { - const prev = array[i - 1] && array[i - 1].name; - const next = array[i + 1] && array[i + 1].name; - logWarn`Unexpected null in ${obj.name} (${obj.what}) (array key ${key} - prev: ${prev}, next: ${next})`; + const allContribSources = []; + + // Collect all contrib data sources into one array, which will be processed + // later. + const collectContributors = function(thing, ...contribDataKeys) { + allContribSources.push(...contribDataKeys.map(key => ({ + thing, + key, + data: thing[key] + })).filter(({ data }) => data?.length)); + }; + + // Process in three parts: + // 1) collate all contrib data into one set (no duplicates) + // 2) convert every "who" contrib string into an actual artist object + // 3) filter each source (not the set!) by null who values + const postprocessContributors = function() { + const allContribData = new Set(allContribSources.flatMap(source => source.data)); + const originalContribStrings = new Map(); + + for (const contrib of allContribData) { + originalContribStrings.set(contrib, contrib.who); + contrib.who = find.artist(contrib.who, {wikiData}); + } + + for (const { thing, key, data } of allContribSources) { + data.splice(0, data.length, ...data.filter(contrib => { + if (!contrib.who) { + const orig = originalContribStrings.get(contrib); + logWarn`Post-process: Contributor ${orig} didn't match any artist data - in ${thing.name} (key: ${key})`; } - } - array.splice(0, array.length, ...array.filter(Boolean)); + })); } }; - const filterNullValue = (parent, key) => { - parent.splice(0, parent.length, ...parent.filter(obj => { - if (!obj[key]) { - logWarn`Unexpected null in ${obj.name} (value key ${key})`; - return false; + // Note: this mutates the original object, but NOT the actual array it's + // operating on. This means if the array at the original thing[key] value + // was also used elsewhere, it will have the original values (not the mapped + // and filtered ones). + const mapAndFilter = function(thing, key, { + map, + filter = x => x, + context // only used for debugging + }) { + const replacement = []; + for (const value of thing[key]) { + const newValue = map(value); + if (filter(newValue)) { + replacement.push(newValue); + } else { + let contextPart = `${thing.name}`; + if (context) { + contextPart += ` (${context(thing)})`; + } + logWarn`Post-process: Value ${value} (${key}) didn't match any data - ${contextPart}`; } - return true; - })); + } + thing[key] = replacement; }; - WD.trackData.forEach(track => mapInPlace(track.references, r => find.track(r, {wikiData}))); - WD.trackData.forEach(track => track.aka = find.track(track.aka, {wikiData})); - WD.trackData.forEach(track => mapInPlace(track.artTags, t => find.tag(t, {wikiData}))); - WD.albumData.forEach(album => mapInPlace(album.groups, g => find.group(g, {wikiData}))); - WD.albumData.forEach(album => mapInPlace(album.artTags, t => find.tag(t, {wikiData}))); - WD.artistAliasData.forEach(artist => artist.alias = find.artist(artist.alias, {wikiData})); - WD.contributionData.forEach(contrib => contrib.who = find.artist(contrib.who, {wikiData})); - - filterNullArray(WD.trackData, 'references'); - filterNullArray(WD.trackData, 'artTags'); - filterNullArray(WD.albumData, 'groups'); - filterNullArray(WD.albumData, 'artTags'); - filterNullValue(WD.artistAliasData, 'alias'); - filterNullValue(WD.contributionData, 'who'); - - WD.trackData.forEach(track1 => track1.referencedBy = WD.trackData.filter(track2 => track2.references.includes(track1))); - WD.groupData.forEach(group => group.albums = WD.albumData.filter(album => album.groups.includes(group))); - WD.tagData.forEach(tag => tag.things = sortByArtDate([...WD.albumData, ...WD.trackData]).filter(thing => thing.artTags.includes(tag))); - - WD.groupData.forEach(group => group.category = WD.groupCategoryData.find(x => x.name === group.category)); - WD.groupCategoryData.forEach(category => category.groups = WD.groupData.filter(x => x.category === category)); - - WD.trackData.forEach(track => track.otherReleases = [ - track.aka, - ...WD.trackData.filter(({ aka }) => aka === track || (track.aka && aka === track.aka)), - ].filter(x => x && x !== track)); + const bound = { + findGroup: x => find.group(x, {wikiData}), + findTrack: x => find.track(x, {wikiData}), + findTag: x => find.tag(x, {wikiData}) + }; + + for (const track of WD.trackData) { + const context = () => track.album.name; + track.aka = find.track(track.aka, {wikiData}); + mapAndFilter(track, 'references', {map: bound.findTrack, context}); + mapAndFilter(track, 'artTags', {map: bound.findTag, context}); + collectContributors(track, 'artists', 'contributors', 'coverArtists'); + } + + for (const track1 of WD.trackData) { + track1.referencedBy = WD.trackData.filter(track2 => track2.references.includes(track1)); + track1.otherReleases = [ + track1.aka, + ...WD.trackData.filter(track2 => + track2.aka === track1 || + (track1.aka && track2.aka === track1.aka)) + ].filter(x => x && x !== track1); + } + + for (const album of WD.albumData) { + mapAndFilter(album, 'groups', {map: bound.findGroup}); + mapAndFilter(album, 'artTags', {map: bound.findTag}); + collectContributors(album, 'artists', 'coverArtists', 'wallpaperArtists', 'bannerArtists'); + } + + mapAndFilter(WD, 'artistAliasData', { + map: artist => { + artist.alias = find.artist(artist.alias, {wikiData}); + return artist; + }, + filter: artist => artist.alias + }); + + for (const group of WD.groupData) { + group.albums = WD.albumData.filter(album => album.groups.includes(group)); + group.category = WD.groupCategoryData.find(x => x.name === group.category); + } + + for (const category of WD.groupCategoryData) { + category.groups = WD.groupData.filter(x => x.category === category); + } + + const albumAndTrackDataSortedByArtDateMan = sortByArtDate([...WD.albumData, ...WD.trackData]); + + for (const tag of WD.tagData) { + tag.things = albumAndTrackDataSortedByArtDateMan.filter(thing => thing.artTags.includes(tag)); + } if (WD.wikiInfo.features.flashesAndGames) { - WD.flashData.forEach(flash => mapInPlace(flash.tracks, t => find.track(t, {wikiData}))); - WD.flashData.forEach(flash => flash.act = WD.flashActData.find(act => act.name === flash.act)); - WD.flashActData.forEach(act => act.flashes = WD.flashData.filter(flash => flash.act === act)); + for (const flash of WD.flashData) { + flash.act = WD.flashActData.find(act => act.name === flash.act); + mapAndFilter(flash, 'tracks', {map: bound.findTrack}); + collectContributors(flash, 'contributors'); + } - filterNullArray(WD.flashData, 'tracks'); + for (const act of WD.flashActData) { + act.flashes = WD.flashData.filter(flash => flash.act === act); + } - WD.trackData.forEach(track => track.flashes = WD.flashData.filter(flash => flash.tracks.includes(track))); + for (const track of WD.trackData) { + track.flashes = WD.flashData.filter(flash => flash.tracks.includes(track)); + } } - WD.artistData.forEach(artist => { + for (const artist of WD.artistData) { const filterProp = (array, prop) => array.filter(thing => thing[prop]?.some(({ who }) => who === artist)); const filterCommentary = array => array.filter(thing => thing.commentary && thing.commentary.replace(/<\/?b>/g, '').includes('' + artist.name + ':')); artist.tracks = { @@ -2790,7 +2847,9 @@ async function main() { asContributor: filterProp(WD.flashData, 'contributors') }; } - }); + } + + postprocessContributors(); WD.officialAlbumData = WD.albumData.filter(album => album.groups.some(group => group.directory === OFFICIAL_GROUP_DIRECTORY)); WD.fandomAlbumData = WD.albumData.filter(album => album.groups.every(group => group.directory !== OFFICIAL_GROUP_DIRECTORY)); diff --git a/src/util/find.js b/src/util/find.js index 1cbeb82c..5f69bbec 100644 --- a/src/util/find.js +++ b/src/util/find.js @@ -1,15 +1,36 @@ import { + logError, logWarn } from './cli.js'; -function findHelper(keys, dataProp, findFn) { - return (ref, {wikiData}) => { - if (!ref) return null; - ref = ref.replace(new RegExp(`^(${keys.join('|')}):`), ''); +function findHelper(keys, dataProp, findFns = {}) { + const byDirectory = findFns.byDirectory || matchDirectory; + const byName = findFns.byName || matchName; + + const keyRefRegex = new RegExp(`^((${keys.join('|')}):)?(.*)$`); + + return (fullRef, {wikiData}) => { + if (!fullRef) return null; + if (typeof fullRef !== 'string') { + throw new Error(`Got a reference that is ${typeof fullRef}, not string: ${fullRef}`); + } + + const match = fullRef.match(keyRefRegex); + if (!match) { + throw new Error(`Malformed link reference: "${fullRef}"`); + } + + const key = match[1]; + const ref = match[3]; + + const data = wikiData[dataProp]; + + const found = (key + ? byDirectory(ref, data) + : byName(ref, data)); - const found = findFn(ref, wikiData[dataProp]); if (!found) { - logWarn`Didn't match anything for ${ref}! (${keys.join(', ')})`; + logWarn`Didn't match anything for ${fullRef}!`; } return found; @@ -20,35 +41,45 @@ function matchDirectory(ref, data) { return data.find(({ directory }) => directory === ref); } -function matchDirectoryOrName(ref, data) { - let thing; +function matchName(ref, data) { + const matches = data.filter(({ name }) => name.toLowerCase() === ref.toLowerCase()); - thing = matchDirectory(ref, data); - if (thing) return thing; + if (matches.length > 1) { + logError`Multiple matches for reference "${ref}". Please resolve:`; + for (const match of matches) { + logError`- ${match.name} (${match.directory})`; + } + logError`Returning null for this reference.`; + return null; + } - thing = data.find(({ name }) => name === ref); - if (thing) return thing; + if (matches.length === 0) { + return null; + } - thing = data.find(({ name }) => name.toLowerCase() === ref.toLowerCase()); - if (thing) { + const thing = matches[0]; + + if (ref !== thing.name) { logWarn`Bad capitalization: ${'\x1b[31m' + ref} -> ${'\x1b[32m' + thing.name}`; - return thing; } - return null; + return thing; +} + +function matchTagName(ref, data) { + return matchName(ref.startsWith('cw: ') ? ref.slice(4) : ref, data); } const find = { - album: findHelper(['album', 'album-commentary'], 'albumData', matchDirectoryOrName), - artist: findHelper(['artist', 'artist-gallery'], 'artistData', matchDirectoryOrName), - flash: findHelper(['flash'], 'flashData', matchDirectory), - group: findHelper(['group', 'group-gallery'], 'groupData', matchDirectoryOrName), - listing: findHelper(['listing'], 'listingSpec', matchDirectory), - newsEntry: findHelper(['news-entry'], 'newsData', matchDirectory), - staticPage: findHelper(['static'], 'staticPageData', matchDirectory), - tag: findHelper(['tag'], 'tagData', (ref, data) => - matchDirectoryOrName(ref.startsWith('cw: ') ? ref.slice(4) : ref, data)), - track: findHelper(['track'], 'trackData', matchDirectoryOrName) + album: findHelper(['album', 'album-commentary'], 'albumData'), + artist: findHelper(['artist', 'artist-gallery'], 'artistData'), + flash: findHelper(['flash'], 'flashData'), + group: findHelper(['group', 'group-gallery'], 'groupData'), + listing: findHelper(['listing'], 'listingSpec'), + newsEntry: findHelper(['news-entry'], 'newsData'), + staticPage: findHelper(['static'], 'staticPageData'), + tag: findHelper(['tag'], 'tagData', {byName: matchTagName}), + track: findHelper(['track'], 'trackData') }; export default find; diff --git a/src/util/replacer.js b/src/util/replacer.js index 0c16dc8b..6c524778 100644 --- a/src/util/replacer.js +++ b/src/util/replacer.js @@ -324,7 +324,10 @@ function evaluateTag(node, opts) { const source = input.slice(node.i, node.iEnd); - const replacerKey = node.data.replacerKey?.data || 'track'; + const replacerKeyImplied = !node.data.replacerKey; + const replacerKey = (replacerKeyImplied + ? 'track' + : node.data.replacerKey.data); if (!replacerSpec[replacerKey]) { logWarn`The link ${source} has an invalid replacer key!`; @@ -343,7 +346,9 @@ function evaluateTag(node, opts) { const value = ( valueFn ? valueFn(replacerValue) : - findKey ? find[findKey](replacerValue, {wikiData}) : + findKey ? find[findKey]((replacerKeyImplied + ? replacerValue + : replacerKey + `:` + replacerValue), {wikiData}) : { directory: replacerValue, name: null -- cgit 1.3.0-6-gf8a5