« get me outta code hell

hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src/util/wiki-data.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/util/wiki-data.js')
-rw-r--r--src/util/wiki-data.js35
1 files changed, 35 insertions, 0 deletions
diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js
index 0790ae9..b5813c7 100644
--- a/src/util/wiki-data.js
+++ b/src/util/wiki-data.js
@@ -629,6 +629,41 @@ export function sortFlashesChronologically(data, {
 
 // Specific data utilities
 
+// Matches heading details from commentary data in roughly the formats:
+//
+//    <i>artistReference:</i> (annotation, date)
+//    <i>artistReference|artistDisplayText:</i> (annotation, date)
+//
+// where capturing group "annotation" can be any text at all, except that the
+// last entry (past a comma or the only content within parentheses), if parsed
+// as a date, is the capturing group "date". "Parsing as a date" means matching
+// one of these formats:
+//
+//   * "25 December 2019" - one or two number digits, followed by any text,
+//     followed by four number digits
+//   * "December 25, 2019" - one all-letters word, a space, one or two number
+//     digits, a comma, and four number digits
+//   * "12/25/2019" etc - three sets of one to four number digits, separated
+//     by slashes or dashes (only valid orders are MM/DD/YYYY and YYYY/MM/DD)
+//
+// Note that the annotation and date are always wrapped by one opening and one
+// closing parentheses. The whole heading does NOT need to match the entire
+// line it occupies (though it does always start at the first position on that
+// line), and if there is more than one closing parenthesis on the line, the
+// annotation will always cut off only at the last parenthesis, or a comma
+// preceding a date and then the last parenthesis. This is to ensure that
+// parentheses can be part of the actual annotation content.
+//
+// Capturing group "artistReference" is all the characters between <i> and </i>
+// (apart from the pipe and "artistDisplayText" text, if present), and is either
+// the name of an artist or an "artist:directory"-style reference.
+//
+// This regular expression *doesn't* match bodies, which will need to be parsed
+// out of the original string based on the indices matched using this.
+//
+export const commentaryRegex =
+  /^<i>(?<artistReferences>.+?)(?:\|(?<artistDisplayText>.+))?:<\/i>(?: \((?<annotation>(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?<date>[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4}))?\))?/gm;
+
 export function filterAlbumsByCommentary(albums) {
   return albums
     .filter((album) => [album, ...album.tracks].some((x) => x.commentary));