From 362dc0619b93d74ad34df1bfbfd9ebc632fa5156 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Tue, 14 Nov 2023 22:49:51 -0400 Subject: data, yaml: catch commentary artist ref errors --- src/util/wiki-data.js | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/util/wiki-data.js') diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 0790ae91..5ab01225 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -629,6 +629,31 @@ export function sortFlashesChronologically(data, { // Specific data utilities +// Matches heading details from commentary data in roughly the format: +// +// artistReference: (annotation, date) +// +// where capturing group "annotation" can be any text at all, except that the +// last entry (past a comma or the only content within parentheses), if parsed +// as a date, is the capturing group "date". "Parsing as a date" means one of +// these formats: +// +// * "25 December 2019" - one or two number digits, followed by any text, +// followed by four number digits +// * "12/25/2019" - one or two number digits, a slash, one or two number +// digits, a slash, and two to four number digits +// +// The artist reference can optionally be boldface (in ), which will be +// captured as non-null in "boldfaceArtist". Otherwise it is all the characters +// between and and is captured in "artistReference" and is either the +// name of an artist or an "artist:directory"-style reference. +// +// This regular expression *doesn't* match bodies, which will need to be parsed +// out of the original string based on the indices matched using this. +// +export const commentaryRegex = + /^(?)?(?.+):(?:<\/b>)?<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; + export function filterAlbumsByCommentary(albums) { return albums .filter((album) => [album, ...album.tracks].some((x) => x.commentary)); -- cgit 1.3.0-6-gf8a5 From f754a8d9187e435a761db31b5053aa2e7ba22e13 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Tue, 14 Nov 2023 23:36:37 -0400 Subject: data, test: boldfaceArtist -> artistDisplayText --- src/util/wiki-data.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/util/wiki-data.js') diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 5ab01225..75a141d3 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -629,9 +629,10 @@ export function sortFlashesChronologically(data, { // Specific data utilities -// Matches heading details from commentary data in roughly the format: +// Matches heading details from commentary data in roughly the formats: // // artistReference: (annotation, date) +// artistReference|artistDisplayText: (annotation, date) // // where capturing group "annotation" can be any text at all, except that the // last entry (past a comma or the only content within parentheses), if parsed @@ -643,16 +644,15 @@ export function sortFlashesChronologically(data, { // * "12/25/2019" - one or two number digits, a slash, one or two number // digits, a slash, and two to four number digits // -// The artist reference can optionally be boldface (in ), which will be -// captured as non-null in "boldfaceArtist". Otherwise it is all the characters -// between and and is captured in "artistReference" and is either the -// name of an artist or an "artist:directory"-style reference. +// Capturing group "artistReference" is all the characters between and +// (apart from the pipe and "artistDisplayText" text, if present), and is either +// the name of an artist or an "artist:directory"-style reference. // // This regular expression *doesn't* match bodies, which will need to be parsed // out of the original string based on the indices matched using this. // export const commentaryRegex = - /^(?)?(?.+):(?:<\/b>)?<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; + /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; export function filterAlbumsByCommentary(albums) { return albums -- cgit 1.3.0-6-gf8a5 From a34b8d027866fbe858a4d2ff3543bc84c9d5983a Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Fri, 17 Nov 2023 06:53:34 -0400 Subject: data, yaml, content: support multiple artists per commentary entry --- src/util/wiki-data.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/util/wiki-data.js') diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 75a141d3..4c7ec043 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -652,7 +652,7 @@ export function sortFlashesChronologically(data, { // out of the original string based on the indices matched using this. // export const commentaryRegex = - /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; + /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; export function filterAlbumsByCommentary(albums) { return albums -- cgit 1.3.0-6-gf8a5 From 4b7da4c1f8c359e5c82c4cc5e0cfb78f8204850f Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Sat, 18 Nov 2023 19:21:31 -0400 Subject: data: parse commentary heading contents to end of line --- src/util/wiki-data.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/util/wiki-data.js') diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 4c7ec043..8a6d4345 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -652,7 +652,7 @@ export function sortFlashesChronologically(data, { // out of the original string based on the indices matched using this. // export const commentaryRegex = - /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=[,)]))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?/gm; + /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?$/gm; export function filterAlbumsByCommentary(albums) { return albums -- cgit 1.3.0-6-gf8a5 From 522c982bf0b5a0bd39512eb56a9d0d8d8feea44e Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Sat, 18 Nov 2023 19:30:27 -0400 Subject: data: looser commentary date parsing + clearer regex explanation --- src/util/wiki-data.js | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/util/wiki-data.js') diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 8a6d4345..09b3623e 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -641,8 +641,10 @@ export function sortFlashesChronologically(data, { // // * "25 December 2019" - one or two number digits, followed by any text, // followed by four number digits -// * "12/25/2019" - one or two number digits, a slash, one or two number -// digits, a slash, and two to four number digits +// * "December 25, 2019" - one all-letters word, a space, one or two number +// digits, a comma, and four number digits +// * "12/25/2019" etc - three sets of one to four number digits, separated +// by slashes (only valid formats are MM/DD/YYYY and YYYY/MM/DD) // // Capturing group "artistReference" is all the characters between and // (apart from the pipe and "artistDisplayText" text, if present), and is either @@ -652,7 +654,7 @@ export function sortFlashesChronologically(data, { // out of the original string based on the indices matched using this. // export const commentaryRegex = - /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,2}\/[0-9]{1,2}\/[0-9]{2,4}))?\))?$/gm; + /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}\/[0-9]{1,4}\/[0-9]{1,4}))?\))?$/gm; export function filterAlbumsByCommentary(albums) { return albums -- cgit 1.3.0-6-gf8a5 From e874f7e4d0df5fed25d4c359c8cb403e67061e59 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Sat, 18 Nov 2023 19:33:38 -0400 Subject: data: support dash-style short dates in commentary dates --- src/util/wiki-data.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/util/wiki-data.js') diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 09b3623e..5e3182a9 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -644,7 +644,7 @@ export function sortFlashesChronologically(data, { // * "December 25, 2019" - one all-letters word, a space, one or two number // digits, a comma, and four number digits // * "12/25/2019" etc - three sets of one to four number digits, separated -// by slashes (only valid formats are MM/DD/YYYY and YYYY/MM/DD) +// by slashes or dashes (only valid orders are MM/DD/YYYY and YYYY/MM/DD) // // Capturing group "artistReference" is all the characters between and // (apart from the pipe and "artistDisplayText" text, if present), and is either @@ -654,7 +654,7 @@ export function sortFlashesChronologically(data, { // out of the original string based on the indices matched using this. // export const commentaryRegex = - /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}\/[0-9]{1,4}\/[0-9]{1,4}))?\))?$/gm; + /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4}))?\))?$/gm; export function filterAlbumsByCommentary(albums) { return albums -- cgit 1.3.0-6-gf8a5 From 87988954ad7314bee59932b0e5ef3474936ed33e Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Mon, 20 Nov 2023 13:59:13 -0400 Subject: data: update and revamp isCommentary validator --- src/util/wiki-data.js | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'src/util/wiki-data.js') diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index 5e3182a9..b5813c7a 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -636,8 +636,8 @@ export function sortFlashesChronologically(data, { // // where capturing group "annotation" can be any text at all, except that the // last entry (past a comma or the only content within parentheses), if parsed -// as a date, is the capturing group "date". "Parsing as a date" means one of -// these formats: +// as a date, is the capturing group "date". "Parsing as a date" means matching +// one of these formats: // // * "25 December 2019" - one or two number digits, followed by any text, // followed by four number digits @@ -646,6 +646,14 @@ export function sortFlashesChronologically(data, { // * "12/25/2019" etc - three sets of one to four number digits, separated // by slashes or dashes (only valid orders are MM/DD/YYYY and YYYY/MM/DD) // +// Note that the annotation and date are always wrapped by one opening and one +// closing parentheses. The whole heading does NOT need to match the entire +// line it occupies (though it does always start at the first position on that +// line), and if there is more than one closing parenthesis on the line, the +// annotation will always cut off only at the last parenthesis, or a comma +// preceding a date and then the last parenthesis. This is to ensure that +// parentheses can be part of the actual annotation content. +// // Capturing group "artistReference" is all the characters between and // (apart from the pipe and "artistDisplayText" text, if present), and is either // the name of an artist or an "artist:directory"-style reference. @@ -654,7 +662,7 @@ export function sortFlashesChronologically(data, { // out of the original string based on the indices matched using this. // export const commentaryRegex = - /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4}))?\))?$/gm; + /^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4}))?\))?/gm; export function filterAlbumsByCommentary(albums) { return albums -- cgit 1.3.0-6-gf8a5