« get me outta code hell

refine link/ref resolving & data post-processing - hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
diff options
context:
space:
mode:
author(quasar) nebula <qznebula@protonmail.com>2022-01-03 23:23:12 -0400
committer(quasar) nebula <qznebula@protonmail.com>2022-01-03 23:23:12 -0400
commit416b7261fa3c9e3d0873fdc9faf501014462e06c (patch)
tree2a67c7b0bdc2d4fd98d700207e33ac166165280e
parent22e1c9e1f57a9d2252b6bc614ca8abb805721d4d (diff)
refine link/ref resolving & data post-processing
-rwxr-xr-xsrc/upd8.js181
-rw-r--r--src/util/find.js83
-rw-r--r--src/util/replacer.js9
3 files changed, 184 insertions, 89 deletions
diff --git a/src/upd8.js b/src/upd8.js
index 12e301c..a705839 100755
--- a/src/upd8.js
+++ b/src/upd8.js
@@ -186,7 +186,6 @@ import {
     bindOpts,
     call,
     filterEmptyLines,
-    mapInPlace,
     queue,
     splitArray,
     unique,
@@ -2683,16 +2682,6 @@ async function main() {
         }
     }
 
-    {
-        for (const { references, name, album } of WD.trackData) {
-            for (const ref of references) {
-                if (!find.track(ref, {wikiData})) {
-                    logWarn`Track not found "${ref}" in ${name} (${album.name})`;
-                }
-            }
-        }
-    }
-
     WD.contributionData = Array.from(new Set([
         ...WD.trackData.flatMap(track => [...track.artists || [], ...track.contributors || [], ...track.coverArtists || []]),
         ...WD.albumData.flatMap(album => [...album.artists || [], ...album.coverArtists || [], ...album.wallpaperArtists || [], ...album.bannerArtists || []]),
@@ -2705,68 +2694,136 @@ async function main() {
     // more than once. (We 8uild a few additional links that can't 8e cre8ted
     // at initial data processing time here too.)
 
-    const filterNullArray = (parent, key) => {
-        for (const obj of parent) {
-            const array = obj[key];
-            for (let i = 0; i < array.length; i++) {
-                if (!array[i]) {
-                    const prev = array[i - 1] && array[i - 1].name;
-                    const next = array[i + 1] && array[i + 1].name;
-                    logWarn`Unexpected null in ${obj.name} (${obj.what}) (array key ${key} - prev: ${prev}, next: ${next})`;
+    const allContribSources = [];
+
+    // Collect all contrib data sources into one array, which will be processed
+    // later.
+    const collectContributors = function(thing, ...contribDataKeys) {
+        allContribSources.push(...contribDataKeys.map(key => ({
+            thing,
+            key,
+            data: thing[key]
+        })).filter(({ data }) => data?.length));
+    };
+
+    // Process in three parts:
+    // 1) collate all contrib data into one set (no duplicates)
+    // 2) convert every "who" contrib string into an actual artist object
+    // 3) filter each source (not the set!) by null who values
+    const postprocessContributors = function() {
+        const allContribData = new Set(allContribSources.flatMap(source => source.data));
+        const originalContribStrings = new Map();
+
+        for (const contrib of allContribData) {
+            originalContribStrings.set(contrib, contrib.who);
+            contrib.who = find.artist(contrib.who, {wikiData});
+        }
+
+        for (const { thing, key, data } of allContribSources) {
+            data.splice(0, data.length, ...data.filter(contrib => {
+                if (!contrib.who) {
+                    const orig = originalContribStrings.get(contrib);
+                    logWarn`Post-process: Contributor ${orig} didn't match any artist data - in ${thing.name} (key: ${key})`;
                 }
-            }
-            array.splice(0, array.length, ...array.filter(Boolean));
+            }));
         }
     };
 
-    const filterNullValue = (parent, key) => {
-        parent.splice(0, parent.length, ...parent.filter(obj => {
-            if (!obj[key]) {
-                logWarn`Unexpected null in ${obj.name} (value key ${key})`;
-                return false;
+    // Note: this mutates the original object, but NOT the actual array it's
+    // operating on. This means if the array at the original thing[key] value
+    // was also used elsewhere, it will have the original values (not the mapped
+    // and filtered ones).
+    const mapAndFilter = function(thing, key, {
+        map,
+        filter = x => x,
+        context // only used for debugging
+    }) {
+        const replacement = [];
+        for (const value of thing[key]) {
+            const newValue = map(value);
+            if (filter(newValue)) {
+                replacement.push(newValue);
+            } else {
+                let contextPart = `${thing.name}`;
+                if (context) {
+                    contextPart += ` (${context(thing)})`;
+                }
+                logWarn`Post-process: Value ${value} (${key}) didn't match any data - ${contextPart}`;
             }
-            return true;
-        }));
+        }
+        thing[key] = replacement;
     };
 
-    WD.trackData.forEach(track => mapInPlace(track.references, r => find.track(r, {wikiData})));
-    WD.trackData.forEach(track => track.aka = find.track(track.aka, {wikiData}));
-    WD.trackData.forEach(track => mapInPlace(track.artTags, t => find.tag(t, {wikiData})));
-    WD.albumData.forEach(album => mapInPlace(album.groups, g => find.group(g, {wikiData})));
-    WD.albumData.forEach(album => mapInPlace(album.artTags, t => find.tag(t, {wikiData})));
-    WD.artistAliasData.forEach(artist => artist.alias = find.artist(artist.alias, {wikiData}));
-    WD.contributionData.forEach(contrib => contrib.who = find.artist(contrib.who, {wikiData}));
-
-    filterNullArray(WD.trackData, 'references');
-    filterNullArray(WD.trackData, 'artTags');
-    filterNullArray(WD.albumData, 'groups');
-    filterNullArray(WD.albumData, 'artTags');
-    filterNullValue(WD.artistAliasData, 'alias');
-    filterNullValue(WD.contributionData, 'who');
-
-    WD.trackData.forEach(track1 => track1.referencedBy = WD.trackData.filter(track2 => track2.references.includes(track1)));
-    WD.groupData.forEach(group => group.albums = WD.albumData.filter(album => album.groups.includes(group)));
-    WD.tagData.forEach(tag => tag.things = sortByArtDate([...WD.albumData, ...WD.trackData]).filter(thing => thing.artTags.includes(tag)));
-
-    WD.groupData.forEach(group => group.category = WD.groupCategoryData.find(x => x.name === group.category));
-    WD.groupCategoryData.forEach(category => category.groups = WD.groupData.filter(x => x.category === category));
-
-    WD.trackData.forEach(track => track.otherReleases = [
-        track.aka,
-        ...WD.trackData.filter(({ aka }) => aka === track || (track.aka && aka === track.aka)),
-    ].filter(x => x && x !== track));
+    const bound = {
+        findGroup: x => find.group(x, {wikiData}),
+        findTrack: x => find.track(x, {wikiData}),
+        findTag: x => find.tag(x, {wikiData})
+    };
+
+    for (const track of WD.trackData) {
+        const context = () => track.album.name;
+        track.aka = find.track(track.aka, {wikiData});
+        mapAndFilter(track, 'references', {map: bound.findTrack, context});
+        mapAndFilter(track, 'artTags', {map: bound.findTag, context});
+        collectContributors(track, 'artists', 'contributors', 'coverArtists');
+    }
+
+    for (const track1 of WD.trackData) {
+        track1.referencedBy = WD.trackData.filter(track2 => track2.references.includes(track1));
+        track1.otherReleases = [
+            track1.aka,
+            ...WD.trackData.filter(track2 =>
+                track2.aka === track1 ||
+                (track1.aka && track2.aka === track1.aka))
+        ].filter(x => x && x !== track1);
+    }
+
+    for (const album of WD.albumData) {
+        mapAndFilter(album, 'groups', {map: bound.findGroup});
+        mapAndFilter(album, 'artTags', {map: bound.findTag});
+        collectContributors(album, 'artists', 'coverArtists', 'wallpaperArtists', 'bannerArtists');
+    }
+
+    mapAndFilter(WD, 'artistAliasData', {
+        map: artist => {
+            artist.alias = find.artist(artist.alias, {wikiData});
+            return artist;
+        },
+        filter: artist => artist.alias
+    });
+
+    for (const group of WD.groupData) {
+        group.albums = WD.albumData.filter(album => album.groups.includes(group));
+        group.category = WD.groupCategoryData.find(x => x.name === group.category);
+    }
+
+    for (const category of WD.groupCategoryData) {
+        category.groups = WD.groupData.filter(x => x.category === category);
+    }
+
+    const albumAndTrackDataSortedByArtDateMan = sortByArtDate([...WD.albumData, ...WD.trackData]);
+
+    for (const tag of WD.tagData) {
+        tag.things = albumAndTrackDataSortedByArtDateMan.filter(thing => thing.artTags.includes(tag));
+    }
 
     if (WD.wikiInfo.features.flashesAndGames) {
-        WD.flashData.forEach(flash => mapInPlace(flash.tracks, t => find.track(t, {wikiData})));
-        WD.flashData.forEach(flash => flash.act = WD.flashActData.find(act => act.name === flash.act));
-        WD.flashActData.forEach(act => act.flashes = WD.flashData.filter(flash => flash.act === act));
+        for (const flash of WD.flashData) {
+            flash.act = WD.flashActData.find(act => act.name === flash.act);
+            mapAndFilter(flash, 'tracks', {map: bound.findTrack});
+            collectContributors(flash, 'contributors');
+        }
 
-        filterNullArray(WD.flashData, 'tracks');
+        for (const act of WD.flashActData) {
+            act.flashes = WD.flashData.filter(flash => flash.act === act);
+        }
 
-        WD.trackData.forEach(track => track.flashes = WD.flashData.filter(flash => flash.tracks.includes(track)));
+        for (const track of WD.trackData) {
+            track.flashes = WD.flashData.filter(flash => flash.tracks.includes(track));
+        }
     }
 
-    WD.artistData.forEach(artist => {
+    for (const artist of WD.artistData) {
         const filterProp = (array, prop) => array.filter(thing => thing[prop]?.some(({ who }) => who === artist));
         const filterCommentary = array => array.filter(thing => thing.commentary && thing.commentary.replace(/<\/?b>/g, '').includes('<i>' + artist.name + ':</i>'));
         artist.tracks = {
@@ -2790,7 +2847,9 @@ async function main() {
                 asContributor: filterProp(WD.flashData, 'contributors')
             };
         }
-    });
+    }
+
+    postprocessContributors();
 
     WD.officialAlbumData = WD.albumData.filter(album => album.groups.some(group => group.directory === OFFICIAL_GROUP_DIRECTORY));
     WD.fandomAlbumData = WD.albumData.filter(album => album.groups.every(group => group.directory !== OFFICIAL_GROUP_DIRECTORY));
diff --git a/src/util/find.js b/src/util/find.js
index 1cbeb82..5f69bbe 100644
--- a/src/util/find.js
+++ b/src/util/find.js
@@ -1,15 +1,36 @@
 import {
+    logError,
     logWarn
 } from './cli.js';
 
-function findHelper(keys, dataProp, findFn) {
-    return (ref, {wikiData}) => {
-        if (!ref) return null;
-        ref = ref.replace(new RegExp(`^(${keys.join('|')}):`), '');
+function findHelper(keys, dataProp, findFns = {}) {
+    const byDirectory = findFns.byDirectory || matchDirectory;
+    const byName = findFns.byName || matchName;
+
+    const keyRefRegex = new RegExp(`^((${keys.join('|')}):)?(.*)$`);
+
+    return (fullRef, {wikiData}) => {
+        if (!fullRef) return null;
+        if (typeof fullRef !== 'string') {
+            throw new Error(`Got a reference that is ${typeof fullRef}, not string: ${fullRef}`);
+        }
+
+        const match = fullRef.match(keyRefRegex);
+        if (!match) {
+            throw new Error(`Malformed link reference: "${fullRef}"`);
+        }
+
+        const key = match[1];
+        const ref = match[3];
+
+        const data = wikiData[dataProp];
+
+        const found = (key
+            ? byDirectory(ref, data)
+            : byName(ref, data));
 
-        const found = findFn(ref, wikiData[dataProp]);
         if (!found) {
-            logWarn`Didn't match anything for ${ref}! (${keys.join(', ')})`;
+            logWarn`Didn't match anything for ${fullRef}!`;
         }
 
         return found;
@@ -20,35 +41,45 @@ function matchDirectory(ref, data) {
     return data.find(({ directory }) => directory === ref);
 }
 
-function matchDirectoryOrName(ref, data) {
-    let thing;
+function matchName(ref, data) {
+    const matches = data.filter(({ name }) => name.toLowerCase() === ref.toLowerCase());
 
-    thing = matchDirectory(ref, data);
-    if (thing) return thing;
+    if (matches.length > 1) {
+        logError`Multiple matches for reference "${ref}". Please resolve:`;
+        for (const match of matches) {
+            logError`- ${match.name} (${match.directory})`;
+        }
+        logError`Returning null for this reference.`;
+        return null;
+    }
 
-    thing = data.find(({ name }) => name === ref);
-    if (thing) return thing;
+    if (matches.length === 0) {
+        return null;
+    }
 
-    thing = data.find(({ name }) => name.toLowerCase() === ref.toLowerCase());
-    if (thing) {
+    const thing = matches[0];
+
+    if (ref !== thing.name) {
         logWarn`Bad capitalization: ${'\x1b[31m' + ref} -> ${'\x1b[32m' + thing.name}`;
-        return thing;
     }
 
-    return null;
+    return thing;
+}
+
+function matchTagName(ref, data) {
+    return matchName(ref.startsWith('cw: ') ? ref.slice(4) : ref, data);
 }
 
 const find = {
-    album: findHelper(['album', 'album-commentary'], 'albumData', matchDirectoryOrName),
-    artist: findHelper(['artist', 'artist-gallery'], 'artistData', matchDirectoryOrName),
-    flash: findHelper(['flash'], 'flashData', matchDirectory),
-    group: findHelper(['group', 'group-gallery'], 'groupData', matchDirectoryOrName),
-    listing: findHelper(['listing'], 'listingSpec', matchDirectory),
-    newsEntry: findHelper(['news-entry'], 'newsData', matchDirectory),
-    staticPage: findHelper(['static'], 'staticPageData', matchDirectory),
-    tag: findHelper(['tag'], 'tagData', (ref, data) =>
-        matchDirectoryOrName(ref.startsWith('cw: ') ? ref.slice(4) : ref, data)),
-    track: findHelper(['track'], 'trackData', matchDirectoryOrName)
+    album: findHelper(['album', 'album-commentary'], 'albumData'),
+    artist: findHelper(['artist', 'artist-gallery'], 'artistData'),
+    flash: findHelper(['flash'], 'flashData'),
+    group: findHelper(['group', 'group-gallery'], 'groupData'),
+    listing: findHelper(['listing'], 'listingSpec'),
+    newsEntry: findHelper(['news-entry'], 'newsData'),
+    staticPage: findHelper(['static'], 'staticPageData'),
+    tag: findHelper(['tag'], 'tagData', {byName: matchTagName}),
+    track: findHelper(['track'], 'trackData')
 };
 
 export default find;
diff --git a/src/util/replacer.js b/src/util/replacer.js
index 0c16dc8..6c52477 100644
--- a/src/util/replacer.js
+++ b/src/util/replacer.js
@@ -324,7 +324,10 @@ function evaluateTag(node, opts) {
 
     const source = input.slice(node.i, node.iEnd);
 
-    const replacerKey = node.data.replacerKey?.data || 'track';
+    const replacerKeyImplied = !node.data.replacerKey;
+    const replacerKey = (replacerKeyImplied
+        ? 'track'
+        : node.data.replacerKey.data);
 
     if (!replacerSpec[replacerKey]) {
         logWarn`The link ${source} has an invalid replacer key!`;
@@ -343,7 +346,9 @@ function evaluateTag(node, opts) {
 
     const value = (
         valueFn ? valueFn(replacerValue) :
-        findKey ? find[findKey](replacerValue, {wikiData}) :
+        findKey ? find[findKey]((replacerKeyImplied
+            ? replacerValue
+            : replacerKey + `:` + replacerValue), {wikiData}) :
         {
             directory: replacerValue,
             name: null