« get me outta code hell

data, yaml: move two functions into new #data-checks import - hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
diff options
context:
space:
mode:
author(quasar) nebula <qznebula@protonmail.com>2024-02-16 09:04:25 -0400
committer(quasar) nebula <qznebula@protonmail.com>2024-02-16 10:28:02 -0400
commit2b0a5231170049bc4fd7f44009c9f32b1404d5a5 (patch)
tree04bc194adc000d5b793f384bf3aee953972f2f4c
parent6f8082142544548303681109d7d30073a0ae069a (diff)
data, yaml: move two functions into new #data-checks import
Due to import time shenanigans, these are provided bindFind and
getAllFindSpecs externally. Sorry!!
-rw-r--r--data-tests/index.js4
-rw-r--r--package.json1
-rw-r--r--src/data/checks.js415
-rw-r--r--src/data/yaml.js406
-rwxr-xr-xsrc/upd8.js18
5 files changed, 437 insertions, 407 deletions
diff --git a/data-tests/index.js b/data-tests/index.js
index d077090..3901db0 100644
--- a/data-tests/index.js
+++ b/data-tests/index.js
@@ -4,6 +4,7 @@ import {fileURLToPath} from 'node:url';
 import chokidar from 'chokidar';
 
 import {colors, logError, logInfo, logWarn, parseOptions} from '#cli';
+import {bindFind, getAllFindSpecs} from '#find';
 import {isMain} from '#node-utils';
 import {getContextAssignments} from '#repl';
 import {bindOpts, showAggregate} from '#sugar';
@@ -32,6 +33,9 @@ async function main() {
   const watcher = chokidar.watch(metaDirname);
 
   const wikiData = await quickLoadAllFromYAML(dataPath, {
+    bindFind,
+    getAllFindSpecs,
+
     showAggregate: bindOpts(showAggregate, {
       showTraces: false,
     }),
diff --git a/package.json b/package.json
index 83e16be..48ffa46 100644
--- a/package.json
+++ b/package.json
@@ -25,6 +25,7 @@
         "#content-dependencies": "./src/content/dependencies/index.js",
         "#content-function": "./src/content-function.js",
         "#cli": "./src/util/cli.js",
+        "#data-checks": "./src/data/checks.js",
         "#external-links": "./src/util/external-links.js",
         "#find": "./src/find.js",
         "#html": "./src/util/html.js",
diff --git a/src/data/checks.js b/src/data/checks.js
new file mode 100644
index 0000000..25e94aa
--- /dev/null
+++ b/src/data/checks.js
@@ -0,0 +1,415 @@
+// checks.js - general validation and error/warning reporting for data objects
+
+import {inspect as nodeInspect} from 'node:util';
+import {colors, ENABLE_COLOR} from '#cli';
+
+import CacheableObject from '#cacheable-object';
+import Thing from '#thing';
+import thingConstructors from '#things';
+import {commentaryRegexCaseSensitive} from '#wiki-data';
+
+import {
+  compareArrays,
+  conditionallySuppressError,
+  decorateErrorWithIndex,
+  empty,
+  filterAggregate,
+  openAggregate,
+  withAggregate,
+} from '#sugar';
+
+function inspect(value, opts = {}) {
+  return nodeInspect(value, {colors: ENABLE_COLOR, ...opts});
+}
+
+// Warn about directories which are reused across more than one of the same type
+// of Thing. Directories are the unique identifier for most data objects across
+// the wiki, so we have to make sure they aren't duplicated!
+export function reportDuplicateDirectories(wikiData, {
+  getAllFindSpecs,
+}) {
+  const duplicateSets = [];
+
+  for (const findSpec of Object.values(getAllFindSpecs())) {
+    if (!findSpec.bindTo) continue;
+
+    const directoryPlaces = Object.create(null);
+    const duplicateDirectories = new Set();
+    const thingData = wikiData[findSpec.bindTo];
+
+    for (const thing of thingData) {
+      if (findSpec.include && !findSpec.include(thing)) {
+        continue;
+      }
+
+      const directories =
+        (findSpec.getMatchableDirectories
+          ? findSpec.getMatchableDirectories(thing)
+          : [thing.directory]);
+
+      for (const directory of directories) {
+        if (directory in directoryPlaces) {
+          directoryPlaces[directory].push(thing);
+          duplicateDirectories.add(directory);
+        } else {
+          directoryPlaces[directory] = [thing];
+        }
+      }
+    }
+
+    if (empty(duplicateDirectories)) continue;
+
+    const sortedDuplicateDirectories =
+      Array.from(duplicateDirectories)
+        .sort((a, b) => {
+          const aL = a.toLowerCase();
+          const bL = b.toLowerCase();
+          return aL < bL ? -1 : aL > bL ? 1 : 0;
+        });
+
+    for (const directory of sortedDuplicateDirectories) {
+      const places = directoryPlaces[directory];
+      duplicateSets.push({directory, places});
+    }
+  }
+
+  if (empty(duplicateSets)) return;
+
+  // Multiple find functions may effectively have duplicates across the same
+  // things. These only need to be reported once, because resolving one of them
+  // will resolve the rest, so cut out duplicate sets before reporting.
+
+  const seenDuplicateSets = new Map();
+  const deduplicateDuplicateSets = [];
+
+  for (const set of duplicateSets) {
+    if (seenDuplicateSets.has(set.directory)) {
+      const placeLists = seenDuplicateSets.get(set.directory);
+
+      for (const places of placeLists) {
+        // We're iterating globally over all duplicate directories, which may
+        // span multiple kinds of things, but that isn't going to cause an
+        // issue because we're comparing the contents by identity, anyway.
+        // Two artists named Foodog aren't going to match two tracks named
+        // Foodog.
+        if (compareArrays(places, set.places, {checkOrder: false})) {
+          continue;
+        }
+      }
+
+      placeLists.push(set.places);
+    } else {
+      seenDuplicateSets.set(set.directory, [set.places]);
+    }
+
+    deduplicateDuplicateSets.push(set);
+  }
+
+  withAggregate({message: `Duplicate directories found`}, ({push}) => {
+    for (const {directory, places} of deduplicateDuplicateSets) {
+      push(new Error(
+        `Duplicate directory ${colors.green(`"${directory}"`)}:\n` +
+        places.map(thing => ` - ` + inspect(thing)).join('\n')));
+    }
+  });
+}
+
+// Warn about references across data which don't match anything.  This involves
+// using the find() functions on all references, setting it to 'error' mode, and
+// collecting everything in a structured logged (which gets logged if there are
+// any errors). At the same time, we remove errored references from the thing's
+// data array.
+export function filterReferenceErrors(wikiData, {
+  bindFind,
+}) {
+  const referenceSpec = [
+    ['albumData', {
+      artistContribs: '_contrib',
+      coverArtistContribs: '_contrib',
+      trackCoverArtistContribs: '_contrib',
+      wallpaperArtistContribs: '_contrib',
+      bannerArtistContribs: '_contrib',
+      groups: 'group',
+      artTags: '_artTag',
+      commentary: '_commentary',
+    }],
+
+    ['groupCategoryData', {
+      groups: 'group',
+    }],
+
+    ['homepageLayout.rows', {
+      sourceGroup: '_homepageSourceGroup',
+      sourceAlbums: 'album',
+    }],
+
+    ['flashData', {
+      contributorContribs: '_contrib',
+      featuredTracks: 'track',
+    }],
+
+    ['flashActData', {
+      flashes: 'flash',
+    }],
+
+    ['trackData', {
+      artistContribs: '_contrib',
+      contributorContribs: '_contrib',
+      coverArtistContribs: '_contrib',
+      referencedTracks: '_trackNotRerelease',
+      sampledTracks: '_trackNotRerelease',
+      artTags: '_artTag',
+      originalReleaseTrack: '_trackNotRerelease',
+      commentary: '_commentary',
+    }],
+
+    ['wikiInfo', {
+      divideTrackListsByGroups: 'group',
+    }],
+  ];
+
+  function getNestedProp(obj, key) {
+    const recursive = (o, k) =>
+      k.length === 1 ? o[k[0]] : recursive(o[k[0]], k.slice(1));
+    const keys = key.split(/(?<=(?<!\\)(?:\\\\)*)\./);
+    return recursive(obj, keys);
+  }
+
+  const boundFind = bindFind(wikiData, {mode: 'error'});
+
+  const findArtistOrAlias = artistRef => {
+    const alias = boundFind.artistAlias(artistRef, {mode: 'quiet'});
+    if (alias) {
+      // No need to check if the original exists here. Aliases are automatically
+      // created from a field on the original, so the original certainly exists.
+      const original = alias.aliasedArtist;
+      throw new Error(`Reference ${colors.red(artistRef)} is to an alias, should be ${colors.green(original.name)}`);
+    }
+
+    return boundFind.artist(artistRef);
+  };
+
+  const aggregate = openAggregate({message: `Errors validating between-thing references in data`});
+  for (const [thingDataProp, propSpec] of referenceSpec) {
+    const thingData = getNestedProp(wikiData, thingDataProp);
+
+    aggregate.nest({message: `Reference errors in ${colors.green('wikiData.' + thingDataProp)}`}, ({nest}) => {
+      const things = Array.isArray(thingData) ? thingData : [thingData];
+
+      for (const thing of things) {
+        nest({message: `Reference errors in ${inspect(thing)}`}, ({nest, push, filter}) => {
+          for (const [property, findFnKey] of Object.entries(propSpec)) {
+            let value = CacheableObject.getUpdateValue(thing, property);
+            let writeProperty = true;
+
+            switch (findFnKey) {
+              case '_commentary':
+                if (value) {
+                  value =
+                    Array.from(value.matchAll(commentaryRegexCaseSensitive))
+                      .map(({groups}) => groups.artistReferences)
+                      .map(text => text.split(',').map(text => text.trim()));
+                }
+
+                writeProperty = false;
+                break;
+
+              case '_contrib':
+                // Don't write out contributions - these'll be filtered out
+                // for content and data purposes automatically, and they're
+                // handy to keep around when update values get checked for
+                // art tags below. (Possibly no reference-related properties
+                // need writing, humm...)
+                writeProperty = false;
+                break;
+            }
+
+            if (value === undefined) {
+              push(new TypeError(`Property ${colors.red(property)} isn't valid for ${colors.green(thing.constructor.name)}`));
+              continue;
+            }
+
+            if (value === null) {
+              continue;
+            }
+
+            let findFn;
+
+            switch (findFnKey) {
+              case '_artTag':
+                findFn = boundFind.artTag;
+                break;
+
+              case '_commentary':
+                findFn = findArtistOrAlias;
+                break;
+
+              case '_contrib':
+                findFn = contribRef => findArtistOrAlias(contribRef.who);
+                break;
+
+              case '_homepageSourceGroup':
+                findFn = groupRef => {
+                  if (groupRef === 'new-additions' || groupRef === 'new-releases') {
+                    return true;
+                  }
+
+                  return boundFind.group(groupRef);
+                };
+                break;
+
+              case '_trackNotRerelease':
+                findFn = trackRef => {
+                  const track = boundFind.track(trackRef);
+                  const originalRef = track && CacheableObject.getUpdateValue(track, 'originalReleaseTrack');
+
+                  if (originalRef) {
+                    // It's possible for the original to not actually exist, in this case.
+                    // It should still be reported since the 'Originally Released As' field
+                    // was present.
+                    const original = boundFind.track(originalRef, {mode: 'quiet'});
+
+                    // Prefer references by name, but only if it's unambiguous.
+                    const originalByName =
+                      (original
+                        ? boundFind.track(original.name, {mode: 'quiet'})
+                        : null);
+
+                    const shouldBeMessage =
+                      (originalByName
+                        ? colors.green(original.name)
+                     : original
+                        ? colors.green('track:' + original.directory)
+                        : colors.green(originalRef));
+
+                    throw new Error(`Reference ${colors.red(trackRef)} is to a rerelease, should be ${shouldBeMessage}`);
+                  }
+
+                  return track;
+                };
+                break;
+
+              default:
+                findFn = boundFind[findFnKey];
+                break;
+            }
+
+            const suppress = fn => conditionallySuppressError(error => {
+              if (property === 'sampledTracks') {
+                // Suppress "didn't match anything" errors in particular, just for samples.
+                // In hsmusic-data we have a lot of "stub" sample data which don't have
+                // corresponding tracks yet, so it won't be useful to report such reference
+                // errors until we take the time to address that. But other errors, like
+                // malformed reference strings or miscapitalized existing tracks, should
+                // still be reported, as samples of existing tracks *do* display on the
+                // website!
+                if (error.message.includes(`Didn't match anything`)) {
+                  return true;
+                }
+              }
+
+              return false;
+            }, fn);
+
+            const {fields} = thing.constructor[Thing.yamlDocumentSpec];
+
+            const field =
+              Object.entries(fields ?? {})
+                .find(([field, fieldSpec]) => fieldSpec.property === property)
+                ?.[0];
+
+            const fieldPropertyMessage =
+              (field
+                ? ` in field ${colors.green(field)}`
+                : ` in property ${colors.green(property)}`);
+
+            const findFnMessage =
+              (findFnKey.startsWith('_')
+                ? ``
+                : ` (${colors.green('find.' + findFnKey)})`);
+
+            const errorMessage =
+              (Array.isArray(value)
+                ? `Reference errors` + fieldPropertyMessage + findFnMessage
+                : `Reference error` + fieldPropertyMessage + findFnMessage);
+
+            let newPropertyValue = value;
+
+            determineNewPropertyValue: {
+              // TODO: The special-casing for artTag is obviously a bit janky.
+              // It would be nice if this could be moved to processDocument ala
+              // fieldCombinationErrors, but art tags are only an error if the
+              // thing doesn't have an artwork - which can't be determined from
+              // the track document on its own, thanks to inheriting contribs
+              // from the album.
+              if (findFnKey === '_artTag') {
+                let hasCoverArtwork =
+                  !empty(CacheableObject.getUpdateValue(thing, 'coverArtistContribs'));
+
+                if (thing.constructor === thingConstructors.Track) {
+                  if (thing.album) {
+                    hasCoverArtwork ||=
+                      !empty(CacheableObject.getUpdateValue(thing.album, 'trackCoverArtistContribs'));
+                  }
+
+                  if (thing.disableUniqueCoverArt) {
+                    hasCoverArtwork = false;
+                  }
+                }
+
+                if (!hasCoverArtwork) {
+                  nest({message: errorMessage}, ({push}) => {
+                    push(new TypeError(`No cover artwork, so this shouldn't have art tags specified`));
+                  });
+
+                  newPropertyValue = [];
+                  break determineNewPropertyValue;
+                }
+              }
+
+              if (findFnKey === '_commentary') {
+                filter(
+                  value, {message: errorMessage},
+                  decorateErrorWithIndex(refs =>
+                    (refs.length === 1
+                      ? suppress(findFn)(refs[0])
+                      : filterAggregate(
+                          refs, {message: `Errors in entry's artist references`},
+                          decorateErrorWithIndex(suppress(findFn)))
+                            .aggregate
+                            .close())));
+
+                // Commentary doesn't write a property value, so no need to set
+                // anything on `newPropertyValue`.
+                break determineNewPropertyValue;
+              }
+
+              if (Array.isArray(value)) {
+                newPropertyValue = filter(
+                  value, {message: errorMessage},
+                  decorateErrorWithIndex(suppress(findFn)));
+                break determineNewPropertyValue;
+              }
+
+              nest({message: errorMessage},
+                suppress(({call}) => {
+                  try {
+                    call(findFn, value);
+                  } catch (error) {
+                    newPropertyValue = null;
+                    throw error;
+                  }
+                }));
+            }
+
+            if (writeProperty) {
+              thing[property] = newPropertyValue;
+            }
+          }
+        });
+      }
+    });
+  }
+
+  return aggregate;
+}
diff --git a/src/data/yaml.js b/src/data/yaml.js
index 77cab94..7a0643e 100644
--- a/src/data/yaml.js
+++ b/src/data/yaml.js
@@ -7,22 +7,19 @@ import {inspect as nodeInspect} from 'node:util';
 
 import yaml from 'js-yaml';
 
-import CacheableObject from '#cacheable-object';
 import {colors, ENABLE_COLOR, logInfo, logWarn} from '#cli';
-import {bindFind, getAllFindSpecs} from '#find';
+import {reportDuplicateDirectories, filterReferenceErrors}
+  from '#data-checks';
 import Thing from '#thing';
 import thingConstructors from '#things';
-import {commentaryRegexCaseSensitive, sortByName} from '#wiki-data';
+import {sortByName} from '#wiki-data';
 
 import {
   annotateErrorWithFile,
   atOffset,
-  compareArrays,
-  conditionallySuppressError,
   decorateErrorWithIndex,
   decorateErrorWithAnnotation,
   empty,
-  filterAggregate,
   filterProperties,
   openAggregate,
   showAggregate,
@@ -1006,394 +1003,6 @@ export function sortWikiDataArrays(wikiData) {
   linkWikiDataArrays(wikiData);
 }
 
-// Warn about directories which are reused across more than one of the same type
-// of Thing. Directories are the unique identifier for most data objects across
-// the wiki, so we have to make sure they aren't duplicated!
-export function reportDuplicateDirectories(wikiData) {
-  const duplicateSets = [];
-
-  for (const findSpec of Object.values(getAllFindSpecs())) {
-    if (!findSpec.bindTo) continue;
-
-    const directoryPlaces = Object.create(null);
-    const duplicateDirectories = new Set();
-    const thingData = wikiData[findSpec.bindTo];
-
-    for (const thing of thingData) {
-      if (findSpec.include && !findSpec.include(thing)) {
-        continue;
-      }
-
-      const directories =
-        (findSpec.getMatchableDirectories
-          ? findSpec.getMatchableDirectories(thing)
-          : [thing.directory]);
-
-      for (const directory of directories) {
-        if (directory in directoryPlaces) {
-          directoryPlaces[directory].push(thing);
-          duplicateDirectories.add(directory);
-        } else {
-          directoryPlaces[directory] = [thing];
-        }
-      }
-    }
-
-    if (empty(duplicateDirectories)) continue;
-
-    const sortedDuplicateDirectories =
-      Array.from(duplicateDirectories)
-        .sort((a, b) => {
-          const aL = a.toLowerCase();
-          const bL = b.toLowerCase();
-          return aL < bL ? -1 : aL > bL ? 1 : 0;
-        });
-
-    for (const directory of sortedDuplicateDirectories) {
-      const places = directoryPlaces[directory];
-      duplicateSets.push({directory, places});
-    }
-  }
-
-  if (empty(duplicateSets)) return;
-
-  // Multiple find functions may effectively have duplicates across the same
-  // things. These only need to be reported once, because resolving one of them
-  // will resolve the rest, so cut out duplicate sets before reporting.
-
-  const seenDuplicateSets = new Map();
-  const deduplicateDuplicateSets = [];
-
-  for (const set of duplicateSets) {
-    if (seenDuplicateSets.has(set.directory)) {
-      const placeLists = seenDuplicateSets.get(set.directory);
-
-      for (const places of placeLists) {
-        // We're iterating globally over all duplicate directories, which may
-        // span multiple kinds of things, but that isn't going to cause an
-        // issue because we're comparing the contents by identity, anyway.
-        // Two artists named Foodog aren't going to match two tracks named
-        // Foodog.
-        if (compareArrays(places, set.places, {checkOrder: false})) {
-          continue;
-        }
-      }
-
-      placeLists.push(set.places);
-    } else {
-      seenDuplicateSets.set(set.directory, [set.places]);
-    }
-
-    deduplicateDuplicateSets.push(set);
-  }
-
-  withAggregate({message: `Duplicate directories found`}, ({push}) => {
-    for (const {directory, places} of deduplicateDuplicateSets) {
-      push(new Error(
-        `Duplicate directory ${colors.green(`"${directory}"`)}:\n` +
-        places.map(thing => ` - ` + inspect(thing)).join('\n')));
-    }
-  });
-}
-
-// Warn about references across data which don't match anything.  This involves
-// using the find() functions on all references, setting it to 'error' mode, and
-// collecting everything in a structured logged (which gets logged if there are
-// any errors). At the same time, we remove errored references from the thing's
-// data array.
-export function filterReferenceErrors(wikiData) {
-  const referenceSpec = [
-    ['albumData', {
-      artistContribs: '_contrib',
-      coverArtistContribs: '_contrib',
-      trackCoverArtistContribs: '_contrib',
-      wallpaperArtistContribs: '_contrib',
-      bannerArtistContribs: '_contrib',
-      groups: 'group',
-      artTags: '_artTag',
-      commentary: '_commentary',
-    }],
-
-    ['groupCategoryData', {
-      groups: 'group',
-    }],
-
-    ['homepageLayout.rows', {
-      sourceGroup: '_homepageSourceGroup',
-      sourceAlbums: 'album',
-    }],
-
-    ['flashData', {
-      contributorContribs: '_contrib',
-      featuredTracks: 'track',
-    }],
-
-    ['flashActData', {
-      flashes: 'flash',
-    }],
-
-    ['trackData', {
-      artistContribs: '_contrib',
-      contributorContribs: '_contrib',
-      coverArtistContribs: '_contrib',
-      referencedTracks: '_trackNotRerelease',
-      sampledTracks: '_trackNotRerelease',
-      artTags: '_artTag',
-      originalReleaseTrack: '_trackNotRerelease',
-      commentary: '_commentary',
-    }],
-
-    ['wikiInfo', {
-      divideTrackListsByGroups: 'group',
-    }],
-  ];
-
-  function getNestedProp(obj, key) {
-    const recursive = (o, k) =>
-      k.length === 1 ? o[k[0]] : recursive(o[k[0]], k.slice(1));
-    const keys = key.split(/(?<=(?<!\\)(?:\\\\)*)\./);
-    return recursive(obj, keys);
-  }
-
-  const boundFind = bindFind(wikiData, {mode: 'error'});
-
-  const findArtistOrAlias = artistRef => {
-    const alias = boundFind.artistAlias(artistRef, {mode: 'quiet'});
-    if (alias) {
-      // No need to check if the original exists here. Aliases are automatically
-      // created from a field on the original, so the original certainly exists.
-      const original = alias.aliasedArtist;
-      throw new Error(`Reference ${colors.red(artistRef)} is to an alias, should be ${colors.green(original.name)}`);
-    }
-
-    return boundFind.artist(artistRef);
-  };
-
-  const aggregate = openAggregate({message: `Errors validating between-thing references in data`});
-  for (const [thingDataProp, propSpec] of referenceSpec) {
-    const thingData = getNestedProp(wikiData, thingDataProp);
-
-    aggregate.nest({message: `Reference errors in ${colors.green('wikiData.' + thingDataProp)}`}, ({nest}) => {
-      const things = Array.isArray(thingData) ? thingData : [thingData];
-
-      for (const thing of things) {
-        nest({message: `Reference errors in ${inspect(thing)}`}, ({nest, push, filter}) => {
-          for (const [property, findFnKey] of Object.entries(propSpec)) {
-            let value = CacheableObject.getUpdateValue(thing, property);
-            let writeProperty = true;
-
-            switch (findFnKey) {
-              case '_commentary':
-                if (value) {
-                  value =
-                    Array.from(value.matchAll(commentaryRegexCaseSensitive))
-                      .map(({groups}) => groups.artistReferences)
-                      .map(text => text.split(',').map(text => text.trim()));
-                }
-
-                writeProperty = false;
-                break;
-
-              case '_contrib':
-                // Don't write out contributions - these'll be filtered out
-                // for content and data purposes automatically, and they're
-                // handy to keep around when update values get checked for
-                // art tags below. (Possibly no reference-related properties
-                // need writing, humm...)
-                writeProperty = false;
-                break;
-            }
-
-            if (value === undefined) {
-              push(new TypeError(`Property ${colors.red(property)} isn't valid for ${colors.green(thing.constructor.name)}`));
-              continue;
-            }
-
-            if (value === null) {
-              continue;
-            }
-
-            let findFn;
-
-            switch (findFnKey) {
-              case '_artTag':
-                findFn = boundFind.artTag;
-                break;
-
-              case '_commentary':
-                findFn = findArtistOrAlias;
-                break;
-
-              case '_contrib':
-                findFn = contribRef => findArtistOrAlias(contribRef.who);
-                break;
-
-              case '_homepageSourceGroup':
-                findFn = groupRef => {
-                  if (groupRef === 'new-additions' || groupRef === 'new-releases') {
-                    return true;
-                  }
-
-                  return boundFind.group(groupRef);
-                };
-                break;
-
-              case '_trackNotRerelease':
-                findFn = trackRef => {
-                  const track = boundFind.track(trackRef);
-                  const originalRef = track && CacheableObject.getUpdateValue(track, 'originalReleaseTrack');
-
-                  if (originalRef) {
-                    // It's possible for the original to not actually exist, in this case.
-                    // It should still be reported since the 'Originally Released As' field
-                    // was present.
-                    const original = boundFind.track(originalRef, {mode: 'quiet'});
-
-                    // Prefer references by name, but only if it's unambiguous.
-                    const originalByName =
-                      (original
-                        ? boundFind.track(original.name, {mode: 'quiet'})
-                        : null);
-
-                    const shouldBeMessage =
-                      (originalByName
-                        ? colors.green(original.name)
-                     : original
-                        ? colors.green('track:' + original.directory)
-                        : colors.green(originalRef));
-
-                    throw new Error(`Reference ${colors.red(trackRef)} is to a rerelease, should be ${shouldBeMessage}`);
-                  }
-
-                  return track;
-                };
-                break;
-
-              default:
-                findFn = boundFind[findFnKey];
-                break;
-            }
-
-            const suppress = fn => conditionallySuppressError(error => {
-              if (property === 'sampledTracks') {
-                // Suppress "didn't match anything" errors in particular, just for samples.
-                // In hsmusic-data we have a lot of "stub" sample data which don't have
-                // corresponding tracks yet, so it won't be useful to report such reference
-                // errors until we take the time to address that. But other errors, like
-                // malformed reference strings or miscapitalized existing tracks, should
-                // still be reported, as samples of existing tracks *do* display on the
-                // website!
-                if (error.message.includes(`Didn't match anything`)) {
-                  return true;
-                }
-              }
-
-              return false;
-            }, fn);
-
-            const {fields} = thing.constructor[Thing.yamlDocumentSpec];
-
-            const field =
-              Object.entries(fields ?? {})
-                .find(([field, fieldSpec]) => fieldSpec.property === property)
-                ?.[0];
-
-            const fieldPropertyMessage =
-              (field
-                ? ` in field ${colors.green(field)}`
-                : ` in property ${colors.green(property)}`);
-
-            const findFnMessage =
-              (findFnKey.startsWith('_')
-                ? ``
-                : ` (${colors.green('find.' + findFnKey)})`);
-
-            const errorMessage =
-              (Array.isArray(value)
-                ? `Reference errors` + fieldPropertyMessage + findFnMessage
-                : `Reference error` + fieldPropertyMessage + findFnMessage);
-
-            let newPropertyValue = value;
-
-            determineNewPropertyValue: {
-              // TODO: The special-casing for artTag is obviously a bit janky.
-              // It would be nice if this could be moved to processDocument ala
-              // fieldCombinationErrors, but art tags are only an error if the
-              // thing doesn't have an artwork - which can't be determined from
-              // the track document on its own, thanks to inheriting contribs
-              // from the album.
-              if (findFnKey === '_artTag') {
-                let hasCoverArtwork =
-                  !empty(CacheableObject.getUpdateValue(thing, 'coverArtistContribs'));
-
-                if (thing.constructor === thingConstructors.Track) {
-                  if (thing.album) {
-                    hasCoverArtwork ||=
-                      !empty(CacheableObject.getUpdateValue(thing.album, 'trackCoverArtistContribs'));
-                  }
-
-                  if (thing.disableUniqueCoverArt) {
-                    hasCoverArtwork = false;
-                  }
-                }
-
-                if (!hasCoverArtwork) {
-                  nest({message: errorMessage}, ({push}) => {
-                    push(new TypeError(`No cover artwork, so this shouldn't have art tags specified`));
-                  });
-
-                  newPropertyValue = [];
-                  break determineNewPropertyValue;
-                }
-              }
-
-              if (findFnKey === '_commentary') {
-                filter(
-                  value, {message: errorMessage},
-                  decorateErrorWithIndex(refs =>
-                    (refs.length === 1
-                      ? suppress(findFn)(refs[0])
-                      : filterAggregate(
-                          refs, {message: `Errors in entry's artist references`},
-                          decorateErrorWithIndex(suppress(findFn)))
-                            .aggregate
-                            .close())));
-
-                // Commentary doesn't write a property value, so no need to set
-                // anything on `newPropertyValue`.
-                break determineNewPropertyValue;
-              }
-
-              if (Array.isArray(value)) {
-                newPropertyValue = filter(
-                  value, {message: errorMessage},
-                  decorateErrorWithIndex(suppress(findFn)));
-                break determineNewPropertyValue;
-              }
-
-              nest({message: errorMessage},
-                suppress(({call}) => {
-                  try {
-                    call(findFn, value);
-                  } catch (error) {
-                    newPropertyValue = null;
-                    throw error;
-                  }
-                }));
-            }
-
-            if (writeProperty) {
-              thing[property] = newPropertyValue;
-            }
-          }
-        });
-      }
-    });
-  }
-
-  return aggregate;
-}
-
 // Utility function for loading all wiki data from the provided YAML data
 // directory (e.g. the root of the hsmusic-data repository). This doesn't
 // provide much in the way of customization; it's meant to be used more as
@@ -1401,8 +1010,11 @@ export function filterReferenceErrors(wikiData) {
 // where reporting info about data loading isn't as relevant as during the
 // main wiki build process.
 export async function quickLoadAllFromYAML(dataPath, {
+  bindFind,
+  getAllFindSpecs,
+
   showAggregate: customShowAggregate = showAggregate,
-} = {}) {
+}) {
   const showAggregate = customShowAggregate;
 
   let wikiData;
@@ -1424,7 +1036,7 @@ export async function quickLoadAllFromYAML(dataPath, {
   linkWikiDataArrays(wikiData);
 
   try {
-    reportDuplicateDirectories(wikiData).close();
+    reportDuplicateDirectories(wikiData, {getAllFindSpecs}).close();
     logInfo`No duplicate directories found. (complete data)`;
   } catch (error) {
     showAggregate(error);
@@ -1432,7 +1044,7 @@ export async function quickLoadAllFromYAML(dataPath, {
   }
 
   try {
-    filterReferenceErrors(wikiData).close();
+    filterReferenceErrors(wikiData, {bindFind}).close();
     logInfo`No reference errors found. (complete data)`;
   } catch (error) {
     showAggregate(error);
diff --git a/src/upd8.js b/src/upd8.js
index af3d107..04c0ce2 100755
--- a/src/upd8.js
+++ b/src/upd8.js
@@ -40,12 +40,17 @@ import wrap from 'word-wrap';
 
 import CacheableObject from '#cacheable-object';
 import {displayCompositeCacheAnalysis} from '#composite';
+import {filterReferenceErrors, reportDuplicateDirectories}
+  from '#data-checks';
+import {bindFind, getAllFindSpecs} from '#find';
 import {processLanguageFile, watchLanguageFile, internalDefaultStringsFile}
   from '#language';
 import {isMain, traverse} from '#node-utils';
 import {empty, showAggregate, withEntries} from '#sugar';
 import {generateURLs, urlSpec} from '#urls';
 import {sortByName} from '#wiki-data';
+import {linkWikiDataArrays, loadAndProcessDataDocuments, sortWikiDataArrays}
+  from '#yaml';
 
 import {
   colors,
@@ -67,14 +72,6 @@ import genThumbs, {
   verifyImagePaths,
 } from '#thumbs';
 
-import {
-  filterReferenceErrors,
-  linkWikiDataArrays,
-  loadAndProcessDataDocuments,
-  reportDuplicateDirectories,
-  sortWikiDataArrays,
-} from '#yaml';
-
 import FileSizePreloader from './file-size-preloader.js';
 import {listingSpec, listingTargetSpec} from './listing-spec.js';
 import * as buildModes from './write/build-modes/index.js';
@@ -1118,7 +1115,7 @@ async function main() {
   });
 
   try {
-    reportDuplicateDirectories(wikiData);
+    reportDuplicateDirectories(wikiData, {getAllFindSpecs});
     logInfo`No duplicate directories found - nice!`;
 
     Object.assign(stepStatusSummary.reportDuplicateDirectories, {
@@ -1151,7 +1148,8 @@ async function main() {
       timeStart: Date.now(),
     });
 
-    const filterReferenceErrorsAggregate = filterReferenceErrors(wikiData);
+    const filterReferenceErrorsAggregate =
+      filterReferenceErrors(wikiData, {bindFind});
 
     try {
       filterReferenceErrorsAggregate.close();