From 87988954ad7314bee59932b0e5ef3474936ed33e Mon Sep 17 00:00:00 2001
From: "(quasar) nebula" <qznebula@protonmail.com>
Date: Mon, 20 Nov 2023 13:59:13 -0400
Subject: data: update and revamp isCommentary validator

---
 src/data/things/validators.js | 59 +++++++++++++++++++++++++++++++++++--------
 src/util/wiki-data.js         | 14 +++++++---
 2 files changed, 60 insertions(+), 13 deletions(-)
diff --git a/src/data/things/validators.js b/src/data/things/validators.js
index 2893e7fd..569a7b34 100644
--- a/src/data/things/validators.js
+++ b/src/data/things/validators.js
@@ -5,7 +5,8 @@ import printable_characters from 'printable-characters';
 const {strlen} = printable_characters;
 
 import {colors, ENABLE_COLOR} from '#cli';
-import {empty, typeAppearance, withAggregate} from '#sugar';
+import {cut, empty, typeAppearance, withAggregate} from '#sugar';
+import {commentaryRegex} from '#wiki-data';
 
 function inspect(value) {
   return nodeInspect(value, {colors: ENABLE_COLOR});
@@ -248,18 +249,56 @@ export function isColor(color) {
   throw new TypeError(`Unknown color format`);
 }
 
-export function isCommentary(commentary) {
-  isString(commentary);
+export function isCommentary(commentaryText) {
+  isString(commentaryText);
 
-  const [firstLine] = commentary.match(/.*/);
-  if (!firstLine.replace(/<\/b>/g, '').includes(':</i>')) {
-    throw new TypeError(`Missing commentary citation: "${
-      firstLine.length > 40
-        ? firstLine.slice(0, 40) + '...'
-        : firstLine
-    }"`);
+  const rawMatches =
+    Array.from(commentaryText.matchAll(commentaryRegex));
+
+  if (empty(rawMatches)) {
+    throw new TypeError(`Expected at least one commentary heading`);
   }
 
+  const niceMatches =
+    rawMatches.map(match => ({
+      position: match.index,
+      length: match[0].length,
+    }));
+
+  validateArrayItems(({position, length}, index) => {
+    if (index === 0 && position > 0) {
+      throw new TypeError(`Expected first commentary heading to be at top`);
+    }
+
+    const ownInput = commentaryText.slice(position, position + length);
+    const restOfInput = commentaryText.slice(position + length);
+    const nextLineBreak = restOfInput.indexOf('\n');
+    const upToNextLineBreak = restOfInput.slice(0, nextLineBreak);
+
+    if (/\S/.test(upToNextLineBreak)) {
+      throw new TypeError(
+        `Expected commentary heading to occupy entire line, got extra text:\n` +
+        `${colors.green(`"${cut(ownInput, 40)}"`)} (<- heading)\n` +
+        `(extra on same line ->) ${colors.red(`"${cut(upToNextLineBreak, 30)}"`)}\n` +
+        `(Check for missing "|-" in YAML, or a misshapen annotation)`);
+    }
+
+    const nextHeading =
+      (index === niceMatches.length - 1
+        ? commentaryText.length
+        : niceMatches[index + 1].position);
+
+    const upToNextHeading =
+      commentaryText.slice(position + length, nextHeading);
+
+    if (!/\S/.test(upToNextHeading)) {
+      throw new TypeError(
+        `Expected commentary entry to have body text, only got a heading`);
+    }
+
+    return true;
+  })(niceMatches);
+
   return true;
 }
 
diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js
index 5e3182a9..b5813c7a 100644
--- a/src/util/wiki-data.js
+++ b/src/util/wiki-data.js
@@ -636,8 +636,8 @@ export function sortFlashesChronologically(data, {
 //
 // where capturing group "annotation" can be any text at all, except that the
 // last entry (past a comma or the only content within parentheses), if parsed
-// as a date, is the capturing group "date". "Parsing as a date" means one of
-// these formats:
+// as a date, is the capturing group "date". "Parsing as a date" means matching
+// one of these formats:
 //
 //   * "25 December 2019" - one or two number digits, followed by any text,
 //     followed by four number digits
@@ -646,6 +646,14 @@ export function sortFlashesChronologically(data, {
 //   * "12/25/2019" etc - three sets of one to four number digits, separated
 //     by slashes or dashes (only valid orders are MM/DD/YYYY and YYYY/MM/DD)
 //
+// Note that the annotation and date are always wrapped by one opening and one
+// closing parentheses. The whole heading does NOT need to match the entire
+// line it occupies (though it does always start at the first position on that
+// line), and if there is more than one closing parenthesis on the line, the
+// annotation will always cut off only at the last parenthesis, or a comma
+// preceding a date and then the last parenthesis. This is to ensure that
+// parentheses can be part of the actual annotation content.
+//
 // Capturing group "artistReference" is all the characters between <i> and </i>
 // (apart from the pipe and "artistDisplayText" text, if present), and is either
 // the name of an artist or an "artist:directory"-style reference.
@@ -654,7 +662,7 @@ export function sortFlashesChronologically(data, {
 // out of the original string based on the indices matched using this.
 //
 export const commentaryRegex =
-  /^<i>(?<artistReferences>.+?)(?:\|(?<artistDisplayText>.+))?:<\/i>(?: \((?<annotation>(?:.*?(?=,|\)$))*?)(?:,? ?(?<date>[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4}))?\))?$/gm;
+  /^<i>(?<artistReferences>.+?)(?:\|(?<artistDisplayText>.+))?:<\/i>(?: \((?<annotation>(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?<date>[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4}))?\))?/gm;
 
 export function filterAlbumsByCommentary(albums) {
   return albums
-- 
cgit 1.3.0-6-gf8a5