From 362dc0619b93d74ad34df1bfbfd9ebc632fa5156 Mon Sep 17 00:00:00 2001
From: "(quasar) nebula" <qznebula@protonmail.com>
Date: Tue, 14 Nov 2023 22:49:51 -0400
Subject: data, yaml: catch commentary artist ref errors

---
 src/util/wiki-data.js | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

(limited to 'src/util/wiki-data.js')
diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js
index 0790ae9..5ab0122 100644
--- a/src/util/wiki-data.js
+++ b/src/util/wiki-data.js
@@ -629,6 +629,31 @@ export function sortFlashesChronologically(data, {
 
 // Specific data utilities
 
+// Matches heading details from commentary data in roughly the format:
+//
+//    <i>artistReference:</i> (annotation, date)
+//
+// where capturing group "annotation" can be any text at all, except that the
+// last entry (past a comma or the only content within parentheses), if parsed
+// as a date, is the capturing group "date". "Parsing as a date" means one of
+// these formats:
+//
+//   * "25 December 2019" - one or two number digits, followed by any text,
+//     followed by four number digits
+//   * "12/25/2019" - one or two number digits, a slash, one or two number
+//     digits, a slash, and two to four number digits
+//
+// The artist reference can optionally be boldface (in <b></b>), which will be
+// captured as non-null in "boldfaceArtist". Otherwise it is all the characters
+// between <i> and </i> and is captured in "artistReference" and is either the
+// name of an artist or an "artist:directory"-style reference.
+//
+// This regular expression *doesn't* match bodies, which will need to be parsed
+// out of the original string based on the indices matched using this.
+//
+export const commentaryRegex =
+  /^<i>(?<boldfaceArtist><b>)?(?<artistReference>.+):(?:<\/b>)?<\/i>(?: \((?<annotation>(?:.*?(?=[,)]))*?)(?:,? ?(?<date>[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm;
+
 export function filterAlbumsByCommentary(albums) {
   return albums
     .filter((album) => [album, ...album.tracks].some((x) => x.commentary));
-- 
cgit 1.3.0-6-gf8a5