From 87988954ad7314bee59932b0e5ef3474936ed33e Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Mon, 20 Nov 2023 13:59:13 -0400 Subject: data: update and revamp isCommentary validator --- src/data/things/validators.js | 59 +++++++++++++++++++++++++++++++++++-------- src/util/wiki-data.js | 14 +++++++--- 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/src/data/things/validators.js b/src/data/things/validators.js index 2893e7fd..569a7b34 100644 --- a/src/data/things/validators.js +++ b/src/data/things/validators.js @@ -5,7 +5,8 @@ import printable_characters from 'printable-characters'; const {strlen} = printable_characters; import {colors, ENABLE_COLOR} from '#cli'; -import {empty, typeAppearance, withAggregate} from '#sugar'; +import {cut, empty, typeAppearance, withAggregate} from '#sugar'; +import {commentaryRegex} from '#wiki-data'; function inspect(value) { return nodeInspect(value, {colors: ENABLE_COLOR}); @@ -248,18 +249,56 @@ export function isColor(color) { throw new TypeError(`Unknown color format`); } -export function isCommentary(commentary) { - isString(commentary); +export function isCommentary(commentaryText) { + isString(commentaryText); - const [firstLine] = commentary.match(/.*/); - if (!firstLine.replace(/<\/b>/g, '').includes(':')) { - throw new TypeError(`Missing commentary citation: "${ - firstLine.length > 40 - ? firstLine.slice(0, 40) + '...' - : firstLine - }"`); + const rawMatches = + Array.from(commentaryText.matchAll(commentaryRegex)); + + if (empty(rawMatches)) { + throw new TypeError(`Expected at least one commentary heading`); } + const niceMatches = + rawMatches.map(match => ({ + position: match.index, + length: match[0].length, + })); + + validateArrayItems(({position, length}, index) => { + if (index === 0 && position > 0) { + throw new TypeError(`Expected first commentary heading to be at top`); + } + + const ownInput = commentaryText.slice(position, position + length); + const restOfInput = commentaryText.slice(position + length); + const nextLineBreak = restOfInput.indexOf('\n'); + const upToNextLineBreak = restOfInput.slice(0, nextLineBreak); + + if (/\S/.test(upToNextLineBreak)) { + throw new TypeError( + `Expected commentary heading to occupy entire line, got extra text:\n` + + `${colors.green(`"${cut(ownInput, 40)}"`)} (<- heading)\n` + + `(extra on same line ->) ${colors.red(`"${cut(upToNextLineBreak, 30)}"`)}\n` + + `(Check for missing "|-" in YAML, or a misshapen annotation)`); + } + + const nextHeading = + (index === niceMatches.length - 1 + ? commentaryText.length + : niceMatches[index + 1].position); + + const upToNextHeading = + commentaryText.slice(position + length, nextHeading); + + if (!/\S/.test(upToNextHeading)) { + throw new TypeError( + `Expected commentary entry to have body text, only got a heading`); + } + + return true; + })(niceMatches); + return true; } diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 5e3182a9..b5813c7a 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -636,8 +636,8 @@ export function sortFlashesChronologically(data, { // // where capturing group "annotation" can be any text at all, except that the // last entry (past a comma or the only content within parentheses), if parsed -// as a date, is the capturing group "date". "Parsing as a date" means one of -// these formats: +// as a date, is the capturing group "date". "Parsing as a date" means matching +// one of these formats: // // * "25 December 2019" - one or two number digits, followed by any text, // followed by four number digits @@ -646,6 +646,14 @@ export function sortFlashesChronologically(data, { // * "12/25/2019" etc - three sets of one to four number digits, separated // by slashes or dashes (only valid orders are MM/DD/YYYY and YYYY/MM/DD) // +// Note that the annotation and date are always wrapped by one opening and one +// closing parentheses. The whole heading does NOT need to match the entire +// line it occupies (though it does always start at the first position on that +// line), and if there is more than one closing parenthesis on the line, the +// annotation will always cut off only at the last parenthesis, or a comma +// preceding a date and then the last parenthesis. This is to ensure that +// parentheses can be part of the actual annotation content. +// // Capturing group "artistReference" is all the characters between and // (apart from the pipe and "artistDisplayText" text, if present), and is either // the name of an artist or an "artist:directory"-style reference. @@ -654,7 +662,7 @@ export function sortFlashesChronologically(data, { // out of the original string based on the indices matched using this. // export const commentaryRegex = - /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4}))?\))?$/gm; + /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4}))?\))?/gm; export function filterAlbumsByCommentary(albums) { return albums -- cgit 1.3.0-6-gf8a5