5 files changed, 243 insertions, 304 deletions
diff --git a/src/common-util/search-shape.js b/src/common-util/search-shape.js
new file mode 100644
index 00000000..e0819ed6
--- /dev/null
+++ b/src/common-util/search-shape.js
@@ -0,0 +1,58 @@
+// Index structures shared by client and server, and relevant interfaces.
+// First and foremost, this is complemented by src/search-select.js, which
+// actually fills the search indexes up with stuff. During build this all
+// gets consumed by src/search.js to make an index, fill it with stuff
+// (as described by search-select.js), and export it to disk; then on
+// the client that export is consumed by src/static/js/search-worker.js,
+// which builds an index in the same shape and imports the data for query.
+
+const baselineStore = [
+  'primaryName',
+  'disambiguator',
+  'artwork',
+  'color',
+];
+
+const genericStore = baselineStore;
+
+const searchShape = {
+  generic: {
+    index: [
+      'primaryName',
+      'parentName',
+      'artTags',
+      'additionalNames',
+      'contributors',
+      'groups',
+    ].map(field => ({field, tokenize: 'forward'})),
+
+    store: genericStore,
+  },
+
+  verbatim: {
+    index: [
+      'primaryName',
+      'parentName',
+      'artTags',
+      'additionalNames',
+      'contributors',
+      'groups',
+    ],
+
+    store: genericStore,
+  },
+};
+
+export default searchShape;
+
+export function makeSearchIndex(descriptor, {FlexSearch}) {
+  return new FlexSearch.Document({
+    id: 'reference',
+    index: descriptor.index,
+    store: descriptor.store,
+
+    // Disable scoring, always return results according to provided order
+    // (specified above in `genericQuery`, etc).
+    resolution: 1,
+  });
+}
diff --git a/src/common-util/search-spec.js b/src/common-util/search-spec.js
deleted file mode 100644
index 75de0d16..00000000
--- a/src/common-util/search-spec.js
+++ /dev/null
@@ -1,259 +0,0 @@
-// Index structures shared by client and server, and relevant interfaces.
-
-function getArtworkPath(thing) {
-  switch (thing.constructor[Symbol.for('Thing.referenceType')]) {
-    case 'album': {
-      return [
-        'media.albumCover',
-        thing.directory,
-        thing.coverArtFileExtension,
-      ];
-    }
-
-    case 'flash': {
-      return [
-        'media.flashArt',
-        thing.directory,
-        thing.coverArtFileExtension,
-      ];
-    }
-
-    case 'track': {
-      if (thing.hasUniqueCoverArt) {
-        return [
-          'media.trackCover',
-          thing.album.directory,
-          thing.directory,
-          thing.coverArtFileExtension,
-        ];
-      } else if (thing.album.hasCoverArt) {
-        return [
-          'media.albumCover',
-          thing.album.directory,
-          thing.album.coverArtFileExtension,
-        ];
-      } else {
-        return null;
-      }
-    }
-
-    default:
-      return null;
-  }
-}
-
-function prepareArtwork(thing, {
-  checkIfImagePathHasCachedThumbnails,
-  getThumbnailEqualOrSmaller,
-  urls,
-}) {
-  const hasWarnings =
-    thing.artTags?.some(artTag => artTag.isContentWarning);
-
-  const artworkPath =
-    getArtworkPath(thing);
-
-  if (!artworkPath) {
-    return undefined;
-  }
-
-  const mediaSrc =
-    urls
-      .from('media.root')
-      .to(...artworkPath);
-
-  if (!checkIfImagePathHasCachedThumbnails(mediaSrc)) {
-    return undefined;
-  }
-
-  const selectedSize =
-    getThumbnailEqualOrSmaller(
-      (hasWarnings ? 'mini' : 'adorb'),
-      mediaSrc);
-
-  const mediaSrcJpeg =
-    mediaSrc.replace(/\.(png|jpg)$/, `.${selectedSize}.jpg`);
-
-  const displaySrc =
-    urls
-      .from('thumb.root')
-      .to('thumb.path', mediaSrcJpeg);
-
-  const serializeSrc =
-    displaySrc.replace(thing.directory, '<>');
-
-  return serializeSrc;
-}
-
-export const searchSpec = {
-  generic: {
-    query: ({
-      albumData,
-      artTagData,
-      artistData,
-      flashData,
-      groupData,
-      trackData,
-    }) => [
-      albumData,
-
-      artTagData,
-
-      artistData
-        .filter(artist => !artist.isAlias),
-
-      flashData,
-
-      groupData,
-
-      trackData
-        // Exclude rereleases - there's no reasonable way to differentiate
-        // them from the main release as part of this query.
-        .filter(track => !track.mainReleaseTrack),
-    ].flat(),
-
-    process(thing, opts) {
-      const fields = {};
-
-      fields.primaryName =
-        thing.name;
-
-      const kind =
-        thing.constructor[Symbol.for('Thing.referenceType')];
-
-      fields.parentName =
-        (kind === 'track'
-          ? thing.album.name
-       : kind === 'group'
-          ? thing.category.name
-       : kind === 'flash'
-          ? thing.act.name
-          : null);
-
-      fields.color =
-        thing.color;
-
-      fields.artTags =
-        (thing.constructor.hasPropertyDescriptor('artTags')
-          ? thing.artTags.map(artTag => artTag.nameShort)
-          : []);
-
-      fields.additionalNames =
-        (thing.constructor.hasPropertyDescriptor('additionalNames')
-          ? thing.additionalNames.map(entry => entry.name)
-       : thing.constructor.hasPropertyDescriptor('aliasNames')
-          ? thing.aliasNames
-          : []);
-
-      const contribKeys = [
-        'artistContribs',
-        'bannerArtistContribs',
-        'contributorContribs',
-        'coverArtistContribs',
-        'wallpaperArtistContribs',
-      ];
-
-      const contributions =
-        contribKeys
-          .filter(key => Object.hasOwn(thing, key))
-          .flatMap(key => thing[key]);
-
-      fields.contributors =
-        contributions
-          .flatMap(({artist}) => [
-            artist.name,
-            ...artist.aliasNames,
-          ]);
-
-      const groups =
-         (Object.hasOwn(thing, 'groups')
-           ? thing.groups
-        : Object.hasOwn(thing, 'album')
-           ? thing.album.groups
-           : []);
-
-      const mainContributorNames =
-        contributions
-          .map(({artist}) => artist.name);
-
-      fields.groups =
-        groups
-          .filter(group => !mainContributorNames.includes(group.name))
-          .map(group => group.name);
-
-      fields.artwork =
-        prepareArtwork(thing, opts);
-
-      return fields;
-    },
-
-    index: [
-      'primaryName',
-      'parentName',
-      'artTags',
-      'additionalNames',
-      'contributors',
-      'groups',
-    ],
-
-    store: [
-      'primaryName',
-      'artwork',
-      'color',
-    ],
-  },
-};
-
-export function makeSearchIndex(descriptor, {FlexSearch}) {
-  return new FlexSearch.Document({
-    id: 'reference',
-    index: descriptor.index,
-    store: descriptor.store,
-  });
-}
-
-// TODO: This function basically mirrors bind-utilities.js, which isn't
-// exactly robust, but... binding might need some more thought across the
-// codebase in *general.*
-function bindSearchUtilities({
-  checkIfImagePathHasCachedThumbnails,
-  getThumbnailEqualOrSmaller,
-  thumbsCache,
-  urls,
-}) {
-  const bound = {
-    urls,
-  };
-
-  bound.checkIfImagePathHasCachedThumbnails =
-    (imagePath) =>
-      checkIfImagePathHasCachedThumbnails(imagePath, thumbsCache);
-
-  bound.getThumbnailEqualOrSmaller =
-    (preferred, imagePath) =>
-      getThumbnailEqualOrSmaller(preferred, imagePath, thumbsCache);
-
-  return bound;
-}
-
-export function populateSearchIndex(index, descriptor, opts) {
-  const {wikiData} = opts;
-  const bound = bindSearchUtilities(opts);
-
-  const collection = descriptor.query(wikiData);
-
-  for (const thing of collection) {
-    const reference = thing.constructor.getReference(thing);
-
-    let processed;
-    try {
-      processed = descriptor.process(thing, bound);
-    } catch (caughtError) {
-      throw new Error(
-        `Failed to process searchable thing ${reference}`,
-        {cause: caughtError});
-    }
-
-    index.add({reference, ...processed});
-  }
-}
diff --git a/src/common-util/sort.js b/src/common-util/sort.js
index d93d94c1..bbe4e551 100644
--- a/src/common-util/sort.js
+++ b/src/common-util/sort.js
@@ -370,11 +370,12 @@ export function sortAlbumsTracksChronologically(data, {
   getDate,
 } = {}) {
   // Sort albums before tracks...
-  sortByConditions(data, [(t) => t.album === undefined]);
+  sortByConditions(data, [t => t.isAlbum]);
 
-  // Group tracks by album...
-  sortByDirectory(data, {
-    getDirectory: (t) => (t.album ? t.album.directory : t.directory),
+  // Put albums alphabetically, and group with them...
+  sortAlphabetically(data, {
+    getDirectory: t => t.isTrack ? t.album.directory : t.directory,
+    getName: t => t.isTrack ? t.album.name : t.name,
   });
 
   // Sort tracks by position in album...
diff --git a/src/common-util/sugar.js b/src/common-util/sugar.js
index 66e160aa..354cf5cc 100644
--- a/src/common-util/sugar.js
+++ b/src/common-util/sugar.js
@@ -70,6 +70,16 @@ export function pick(array) {
   return array[Math.floor(Math.random() * array.length)];
 }
 
+// Gets the only item in a single-item array (strictly, length === 1).
+// If the array has more than one item, or is empty, this is null.
+export function onlyItem(array) {
+  if (array.length === 1) {
+    return array[0];
+  } else {
+    return null;
+  }
+}
+
 // Gets the item at an index relative to another index.
 export function atOffset(array, index, offset, {
   wrap = false,
@@ -116,10 +126,14 @@ export function findIndexOrEnd(array, fn) {
 // returns null (or values in the array are nullish), they'll just be skipped in
 // the sum.
 export function accumulateSum(array, fn = x => x) {
+  if (!Array.isArray(array)) {
+    return accumulateSum(Array.from(array, fn));
+  }
+
   return array.reduce(
     (accumulator, value, index, array) =>
       accumulator +
-        fn(value, index, array) ?? 0,
+      (fn(value, index, array) ?? 0),
     0);
 }
 
@@ -221,6 +235,9 @@ export const compareArrays = (arr1, arr2, {checkOrder = true} = {}) =>
     ? arr1.every((x, i) => arr2[i] === x)
     : arr1.every((x) => arr2.includes(x)));
 
+export const exhaust = (generatorFunction) =>
+  Array.from(generatorFunction());
+
 export function compareObjects(obj1, obj2, {
   checkOrder = false,
   checkSymbols = true,
@@ -251,11 +268,20 @@ export function compareObjects(obj1, obj2, {
 
 // Stolen from jq! Which pro8a8ly stole the concept from other places. Nice.
 export const withEntries = (obj, fn) => {
-  const result = fn(Object.entries(obj));
-  if (result instanceof Promise) {
-    return result.then(entries => Object.fromEntries(entries));
+  if (obj instanceof Map) {
+    const result = fn(Array.from(obj.entries()));
+    if (result instanceof Promise) {
+      return result.then(entries => new Map(entries));
+    } else {
+      return new Map(result);
+    }
   } else {
-    return Object.fromEntries(result);
+    const result = fn(Object.entries(obj));
+    if (result instanceof Promise) {
+      return result.then(entries => Object.fromEntries(entries));
+    } else {
+      return Object.fromEntries(result);
+    }
   }
 }
 
@@ -299,34 +325,74 @@ export function filterProperties(object, properties, {
   return filteredObject;
 }
 
-export function queue(array, max = 50) {
-  if (max === 0) {
-    return array.map((fn) => fn());
+export function queue(functionList, queueSize = 50) {
+  if (queueSize === 0) {
+    return functionList.map(fn => fn());
   }
 
-  const begin = [];
-  let current = 0;
-  const ret = array.map(
-    (fn) =>
-      new Promise((resolve, reject) => {
-        begin.push(() => {
-          current++;
-          Promise.resolve(fn()).then((value) => {
-            current--;
-            if (current < max && begin.length) {
-              begin.shift()();
-            }
-            resolve(value);
-          }, reject);
-        });
-      })
-  );
+  const promiseList = [];
+  const resolveList = [];
+  const rejectList = [];
 
-  for (let i = 0; i < max && begin.length; i++) {
-    begin.shift()();
+  for (let i = 0; i < functionList.length; i++) {
+    const promiseWithResolvers = Promise.withResolvers();
+    promiseList.push(promiseWithResolvers.promise);
+    resolveList.push(promiseWithResolvers.resolve);
+    rejectList.push(promiseWithResolvers.reject);
   }
 
-  return ret;
+  let cursor = 0;
+  let running = 0;
+
+  const next = async () => {
+    if (running >= queueSize) {
+      return;
+    }
+
+    if (cursor === functionList.length) {
+      return;
+    }
+
+    const thisFunction = functionList[cursor];
+    const thisResolve = resolveList[cursor];
+    const thisReject = rejectList[cursor];
+
+    delete functionList[cursor];
+    delete resolveList[cursor];
+    delete rejectList[cursor];
+
+    cursor++;
+    running++;
+
+    try {
+      thisResolve(await thisFunction());
+    } catch (error) {
+      thisReject(error);
+    } finally {
+      running--;
+
+      // If the cursor is at 1, this is the first promise that resolved,
+      // so we're now done the "kick start", and can start the remaining
+      // promises (up to queueSize).
+      if (cursor === 1) {
+        // Since only one promise is used for the "kick start", and that one
+        // has just resolved, we know there's none running at all right now,
+        // and can start as many as specified in the queueSize right away.
+        for (let i = 0; i < queueSize; i++) {
+          next();
+        }
+      } else {
+        next();
+      }
+    }
+  };
+
+  // Only start a single promise, as a "kick start", so that it resolves as
+  // early as possible (it will resolve before we use CPU to start the rest
+  // of the promises, up to queueSize).
+  next();
+
+  return promiseList;
 }
 
 export function delay(ms) {
@@ -357,15 +423,23 @@ export function splitKeys(key) {
 
 // Follows a key path like 'foo.bar.baz' to get an item nested deeply inside
 // an object. If a value partway through the chain is an array, the values
-// down the rest of the chain are gotten for each item in the array.
+// down the rest of the chain are gotten for each item in the array. If a value
+// partway through the chain is missing the next key, the chain stops and is
+// undefined (or null) at that point.
 //
 // obj: {x: [{y: ['a']}, {y: ['b', 'c']}]}
 // key: 'x.y'
 //   -> [['a'], ['b', 'c']]
 //
+// obj: {x: [{y: ['a']}, {y: ['b', 'c']}, {z: ['d', 'e']}]}
+// key: 'x.z'
+//   -> [undefined, undefined, ['d', 'e']]
+//
 export function getNestedProp(obj, key) {
   const recursive = (o, k) =>
-    (k.length === 1
+    (o === undefined || o === null
+      ? o
+   : k.length === 1
       ? o[k[0]]
    : Array.isArray(o[k[0]])
       ? o[k[0]].map(v => recursive(v, k.slice(1)))
diff --git a/src/common-util/wiki-data.js b/src/common-util/wiki-data.js
index a4c6b3bd..1668f110 100644
--- a/src/common-util/wiki-data.js
+++ b/src/common-util/wiki-data.js
@@ -11,7 +11,7 @@ export {filterMultipleArrays} from './sugar.js';
 
 // Generic value operations
 
-export function getKebabCase(name) {
+export function getCaseSensitiveKebabCase(name) {
   return name
 
     // Spaces to dashes
@@ -34,6 +34,9 @@ export function getKebabCase(name) {
     // General punctuation which always separates surrounding words
     .replace(/[/@#$%*()_=,[\]{}|\\;:<>?`~]/g, '-')
 
+    // More punctuation which always separates surrounding words
+    .replace(/[\u{2013}-\u{2014}]/u, '-') // En Dash, Em Dash
+
     // Accented characters
     .replace(/[áâäàå]/gi, 'a')
     .replace(/[çč]/gi, 'c')
@@ -50,17 +53,17 @@ export function getKebabCase(name) {
 
     // Trim dashes on boundaries
     .replace(/^-+|-+$/g, '')
+}
 
-    // Always lowercase
-    .toLowerCase();
+export function getKebabCase(name) {
+  return getCaseSensitiveKebabCase(name).toLowerCase();
 }
 
 // Specific data utilities
 
-// Matches heading details from commentary data in roughly the formats:
+// Matches heading details from commentary data in roughly the format:
 //
-//    <i>artistReference:</i> (annotation, date)
-//    <i>artistReference|artistDisplayText:</i> (annotation, date)
+//    <i>artistText:</i> (annotation, date)
 //
 // where capturing group "annotation" can be any text at all, except that the
 // last entry (past a comma or the only content within parentheses), if parsed
@@ -83,8 +86,9 @@ export function getKebabCase(name) {
 // parentheses can be part of the actual annotation content.
 //
 // Capturing group "artistReference" is all the characters between <i> and </i>
-// (apart from the pipe and "artistDisplayText" text, if present), and is either
-// the name of an artist or an "artist:directory"-style reference.
+// (apart from the pipe and the "artistText" group, if present), and is either
+// the name of one or more artist or "artist:directory"-style references,
+// joined by commas, if multiple.
 //
 // This regular expression *doesn't* match bodies, which will need to be parsed
 // out of the original string based on the indices matched using this.
@@ -94,7 +98,7 @@ const dateRegex = groupName =>
   String.raw`(?<${groupName}>[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4})`;
 
 const commentaryRegexRaw =
-  String.raw`^<i>(?<artistReferences>.+?)(?:\|(?<artistDisplayText>.+))?:<\/i>(?: \((?<annotation>(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?:(?<dateKind>sometime|throughout|around) )?${dateRegex('date')}(?: ?- ?${dateRegex('secondDate')})?(?: (?<accessKind>captured|accessed) ${dateRegex('accessDate')})?)?\))?`;
+  String.raw`^<i>(?<artistText>.+?):<\/i>(?: \((?<annotation>(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?:(?<dateKind>sometime|throughout|around) )?${dateRegex('date')}(?: ?- ?${dateRegex('secondDate')})?(?: (?<accessKind>captured|accessed) ${dateRegex('accessDate')})?)?\))?`;
 export const commentaryRegexCaseInsensitive =
   new RegExp(commentaryRegexRaw, 'gmi');
 export const commentaryRegexCaseSensitive =
@@ -102,6 +106,8 @@ export const commentaryRegexCaseSensitive =
 export const commentaryRegexCaseSensitiveOneShot =
   new RegExp(commentaryRegexRaw);
 
+export const languageOptionRegex = /{(?<name>[A-Z0-9_]+)}/g;
+
 // The #validators function isOldStyleLyrics() describes
 // what this regular expression detects against.
 export const multipleLyricsDetectionRegex =
@@ -113,10 +119,16 @@ export function matchContentEntries(sourceText) {
   let previousMatchEntry = null;
   let previousEndIndex = null;
 
+  const trimBody = body =>
+    body
+      .replace(/^\n*/, '')
+      .replace(/\n*$/, '');
+
   for (const {0: matchText, index: startIndex, groups: matchEntry}
           of sourceText.matchAll(commentaryRegexCaseSensitive)) {
     if (previousMatchEntry) {
-      previousMatchEntry.body = sourceText.slice(previousEndIndex, startIndex);
+      previousMatchEntry.body =
+        trimBody(sourceText.slice(previousEndIndex, startIndex));
     }
 
     matchEntries.push(matchEntry);
@@ -126,7 +138,8 @@ export function matchContentEntries(sourceText) {
   }
 
   if (previousMatchEntry) {
-    previousMatchEntry.body = sourceText.slice(previousEndIndex);
+    previousMatchEntry.body =
+      trimBody(sourceText.slice(previousEndIndex));
   }
 
   return matchEntries;
@@ -522,3 +535,55 @@ export function combineWikiDataArrays(arrays) {
     return combined;
   }
 }
+
+// Markdown stuff
+
+export function* matchMarkdownLinks(markdownSource, {marked}) {
+  const plausibleLinkRegexp = /\[(?=.*?\))/g;
+
+  const lexer = new marked.Lexer();
+
+  // This is just an optimization. Don't let Marked try to process tokens
+  // recursively, i.e. within the text/label of the link. We only care about
+  // the text itself, as a string.
+  lexer.inlineTokens = x => [];
+
+  // This is cheating, because the lexer's tokenizer is a private property,
+  // but we can apparently access it anyway.
+  const {tokenizer} = lexer;
+
+  let plausibleMatch = null;
+  while (plausibleMatch = plausibleLinkRegexp.exec(markdownSource)) {
+    const {index} = plausibleMatch;
+
+    const definiteMatch =
+      tokenizer.link(markdownSource.slice(index));
+
+    if (!definiteMatch) {
+      continue;
+    }
+
+    const {raw: {length}, text: label, href} = definiteMatch;
+
+    yield {label, href, index, length};
+  }
+}
+
+export function* matchInlineLinks(source) {
+  const plausibleLinkRegexp = /\b[a-z]*:\/\/[^ ]*?(?=(?:[,.!?]*)(?:\s|$))/gm;
+
+  let plausibleMatch = null;
+  while (plausibleMatch = plausibleLinkRegexp.exec(source)) {
+    const [href] = plausibleMatch;
+    const {index} = plausibleMatch;
+    const [{length}] = plausibleMatch;
+
+    try {
+      new URL(href);
+    } catch {
+      continue;
+    }
+
+    yield {href, length, index};
+  }
+}