« get me outta code hell

data, yaml: catch commentary artist ref errors - hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
author(quasar) nebula <qznebula@protonmail.com>2023-11-14 22:49:51 -0400
committer(quasar) nebula <qznebula@protonmail.com>2023-11-14 23:32:24 -0400
commit362dc0619b93d74ad34df1bfbfd9ebc632fa5156 (patch)
tree79331efb4387ee03aae6ad6da54bd8e5896189b4 /src
parent09a4af31a3f8207dfe114926b0dbf27eeddf7de9 (diff)
data, yaml: catch commentary artist ref errors
Diffstat (limited to 'src')
-rw-r--r--src/data/composite/wiki-data/withParsedCommentaryEntries.js23
-rw-r--r--src/data/yaml.js55
-rw-r--r--src/util/wiki-data.js25
3 files changed, 67 insertions, 36 deletions
diff --git a/src/data/composite/wiki-data/withParsedCommentaryEntries.js b/src/data/composite/wiki-data/withParsedCommentaryEntries.js
index 5bd72dc9..9e33cdac 100644
--- a/src/data/composite/wiki-data/withParsedCommentaryEntries.js
+++ b/src/data/composite/wiki-data/withParsedCommentaryEntries.js
@@ -2,33 +2,12 @@ import {input, templateCompositeFrom} from '#composite';
 import find from '#find';
 import {stitchArrays} from '#sugar';
 import {isCommentary} from '#validators';
+import {commentaryRegex} from '#wiki-data';
 
 import {fillMissingListItems, withPropertiesFromList} from '#composite/data';
 
 import withResolvedReferenceList from './withResolvedReferenceList.js';
 
-// Matches in roughly the format:
-//
-//    <i>artistReference:</i> (annotation, date)
-//
-// where capturing group "annotation" can be any text at all, except that the
-// last entry (past a comma or the only content within parentheses), if parsed
-// as a date, is the capturing group "date". "Parsing as a date" means one of
-// these formats:
-//
-//   * "25 December 2019" - one or two number digits, followed by any text,
-//     followed by four number digits
-//   * "12/25/2019" - one or two number digits, a slash, one or two number
-//     digits, a slash, and two to four number digits
-//
-// The artist reference can optionally be boldface (in <b></b>), which will be
-// captured as non-null in "boldfaceArtist". Otherwise it is all the characters
-// between <i> and </i> and is captured in "artistReference" and is either the
-// name of an artist or an "artist:directory"-style reference.
-//
-export const commentaryRegex =
-  /^<i>(?<boldfaceArtist><b>)?(?<artistReference>.+):(?:<\/b>)?<\/i>(?: \((?<annotation>(?:.*?(?=[,)]))*?)(?:,? ?(?<date>[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm;
-
 export default templateCompositeFrom({
   annotation: `withParsedCommentaryEntries`,
 
diff --git a/src/data/yaml.js b/src/data/yaml.js
index 986f25d1..843e70b3 100644
--- a/src/data/yaml.js
+++ b/src/data/yaml.js
@@ -28,6 +28,7 @@ import {
 } from '#sugar';
 
 import {
+  commentaryRegex,
   sortAlbumsTracksChronologically,
   sortAlphabetically,
   sortChronologically,
@@ -1616,6 +1617,7 @@ export function filterReferenceErrors(wikiData) {
       bannerArtistContribs: '_contrib',
       groups: 'group',
       artTags: 'artTag',
+      commentary: '_commentary',
     }],
 
     ['trackData', processTrackDocument, {
@@ -1626,6 +1628,7 @@ export function filterReferenceErrors(wikiData) {
       sampledTracks: '_trackNotRerelease',
       artTags: 'artTag',
       originalReleaseTrack: '_trackNotRerelease',
+      commentary: '_commentary',
     }],
 
     ['groupCategoryData', processGroupCategoryDocument, {
@@ -1675,7 +1678,19 @@ export function filterReferenceErrors(wikiData) {
 
         nest({message: `Reference errors in ${inspect(thing)}`}, ({nest, push, filter}) => {
           for (const [property, findFnKey] of Object.entries(propSpec)) {
-            const value = CacheableObject.getUpdateValue(thing, property);
+            let value = CacheableObject.getUpdateValue(thing, property);
+            let writeProperty = true;
+
+            switch (findFnKey) {
+              case '_commentary':
+                if (value) {
+                  value =
+                    Array.from(value.matchAll(commentaryRegex))
+                      .map(({groups}) => groups.artistReference);
+                }
+                writeProperty = false;
+                break;
+            }
 
             if (value === undefined) {
               push(new TypeError(`Property ${colors.red(property)} isn't valid for ${colors.green(thing.constructor.name)}`));
@@ -1688,19 +1703,25 @@ export function filterReferenceErrors(wikiData) {
 
             let findFn;
 
+            const findArtistOrAlias = artistRef => {
+              const alias = find.artist(artistRef, wikiData.artistAliasData, {mode: 'quiet'});
+              if (alias) {
+                // No need to check if the original exists here. Aliases are automatically
+                // created from a field on the original, so the original certainly exists.
+                const original = alias.aliasedArtist;
+                throw new Error(`Reference ${colors.red(artistRef)} is to an alias, should be ${colors.green(original.name)}`);
+              }
+
+              return boundFind.artist(artistRef);
+            };
+
             switch (findFnKey) {
-              case '_contrib':
-                findFn = contribRef => {
-                  const alias = find.artist(contribRef.who, wikiData.artistAliasData, {mode: 'quiet'});
-                  if (alias) {
-                    // No need to check if the original exists here. Aliases are automatically
-                    // created from a field on the original, so the original certainly exists.
-                    const original = alias.aliasedArtist;
-                    throw new Error(`Reference ${colors.red(contribRef.who)} is to an alias, should be ${colors.green(original.name)}`);
-                  }
+              case '_commentary':
+                findFn = findArtistOrAlias;
+                break;
 
-                  return boundFind.artist(contribRef.who);
-                };
+              case '_contrib':
+                findFn = contribRef => findArtistOrAlias(contribRef.who);
                 break;
 
               case '_homepageSourceGroup':
@@ -1781,8 +1802,10 @@ export function filterReferenceErrors(wikiData) {
                 ? `Reference errors` + fieldPropertyMessage + findFnMessage
                 : `Reference error` + fieldPropertyMessage + findFnMessage);
 
+            let newPropertyValue = value;
+
             if (Array.isArray(value)) {
-              thing[property] = filter(
+              newPropertyValue = filter(
                 value,
                 decorateErrorWithIndex(suppress(findFn)),
                 {message: errorMessage});
@@ -1792,11 +1815,15 @@ export function filterReferenceErrors(wikiData) {
                   try {
                     call(findFn, value);
                   } catch (error) {
-                    thing[property] = null;
+                    newPropertyValue = null;
                     throw error;
                   }
                 }));
             }
+
+            if (writeProperty) {
+              thing[property] = newPropertyValue;
+            }
           }
         });
       }
diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js
index 0790ae91..5ab01225 100644
--- a/src/util/wiki-data.js
+++ b/src/util/wiki-data.js
@@ -629,6 +629,31 @@ export function sortFlashesChronologically(data, {
 
 // Specific data utilities
 
+// Matches heading details from commentary data in roughly the format:
+//
+//    <i>artistReference:</i> (annotation, date)
+//
+// where capturing group "annotation" can be any text at all, except that the
+// last entry (past a comma or the only content within parentheses), if parsed
+// as a date, is the capturing group "date". "Parsing as a date" means one of
+// these formats:
+//
+//   * "25 December 2019" - one or two number digits, followed by any text,
+//     followed by four number digits
+//   * "12/25/2019" - one or two number digits, a slash, one or two number
+//     digits, a slash, and two to four number digits
+//
+// The artist reference can optionally be boldface (in <b></b>), which will be
+// captured as non-null in "boldfaceArtist". Otherwise it is all the characters
+// between <i> and </i> and is captured in "artistReference" and is either the
+// name of an artist or an "artist:directory"-style reference.
+//
+// This regular expression *doesn't* match bodies, which will need to be parsed
+// out of the original string based on the indices matched using this.
+//
+export const commentaryRegex =
+  /^<i>(?<boldfaceArtist><b>)?(?<artistReference>.+):(?:<\/b>)?<\/i>(?: \((?<annotation>(?:.*?(?=[,)]))*?)(?:,? ?(?<date>[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm;
+
 export function filterAlbumsByCommentary(albums) {
   return albums
     .filter((album) => [album, ...album.tracks].some((x) => x.commentary));