« get me outta code hell

hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/common-util/search-spec.js214
-rw-r--r--src/static/js/client/sidebar-search.js36
-rw-r--r--src/static/js/search-worker.js174
3 files changed, 272 insertions, 152 deletions
diff --git a/src/common-util/search-spec.js b/src/common-util/search-spec.js
index 43d27846..af5ec201 100644
--- a/src/common-util/search-spec.js
+++ b/src/common-util/search-spec.js
@@ -85,104 +85,116 @@ function prepareArtwork(thing, {
   return serializeSrc;
 }
 
-export const searchSpec = {
-  generic: {
-    query: ({
-      albumData,
-      artTagData,
-      artistData,
-      flashData,
-      groupData,
-      trackData,
-    }) => [
-      albumData,
-
-      artTagData,
-
-      artistData
-        .filter(artist => !artist.isAlias),
-
-      flashData,
-
-      groupData,
-
-      trackData
-        // Exclude rereleases - there's no reasonable way to differentiate
-        // them from the main release as part of this query.
-        .filter(track => !track.mainReleaseTrack),
-    ].flat(),
-
-    process(thing, opts) {
-      const fields = {};
-
-      fields.primaryName =
-        thing.name;
-
-      const kind =
-        thing.constructor[Symbol.for('Thing.referenceType')];
-
-      fields.parentName =
-        (kind === 'track'
-          ? thing.album.name
-       : kind === 'group'
-          ? thing.category.name
-       : kind === 'flash'
-          ? thing.act.name
-          : null);
-
-      fields.color =
-        thing.color;
-
-      fields.artTags =
-        (thing.constructor.hasPropertyDescriptor('artTags')
-          ? thing.artTags.map(artTag => artTag.nameShort)
-          : []);
-
-      fields.additionalNames =
-        (thing.constructor.hasPropertyDescriptor('additionalNames')
-          ? thing.additionalNames.map(entry => entry.name)
-       : thing.constructor.hasPropertyDescriptor('aliasNames')
-          ? thing.aliasNames
-          : []);
-
-      const contribKeys = [
-        'artistContribs',
-        'contributorContribs',
-      ];
+function baselineProcess(thing, opts) {
+  const fields = {};
+
+  fields.primaryName =
+    thing.name;
 
-      const contributions =
-        contribKeys
-          .filter(key => Object.hasOwn(thing, key))
-          .flatMap(key => thing[key]);
+  fields.artwork =
+    prepareArtwork(thing, opts);
+
+  fields.color =
+    thing.color;
+
+  return fields;
+}
 
-      fields.contributors =
-        contributions
-          .flatMap(({artist}) => [
-            artist.name,
-            ...artist.aliasNames,
-          ]);
+const baselineStore = [
+  'primaryName',
+  'artwork',
+  'color',
+];
 
-      const groups =
-         (Object.hasOwn(thing, 'groups')
-           ? thing.groups
-        : Object.hasOwn(thing, 'album')
-           ? thing.album.groups
-           : []);
+function genericQuery(wikiData) {
+  return [
+    wikiData.albumData,
 
-      const mainContributorNames =
-        contributions
-          .map(({artist}) => artist.name);
+    wikiData.artTagData,
 
-      fields.groups =
-        groups
-          .filter(group => !mainContributorNames.includes(group.name))
-          .map(group => group.name);
+    wikiData.artistData
+      .filter(artist => !artist.isAlias),
+
+    wikiData.flashData,
+
+    wikiData.groupData,
+
+    wikiData.trackData
+      // Exclude rereleases - there's no reasonable way to differentiate
+      // them from the main release as part of this query.
+      .filter(track => !track.mainReleaseTrack),
+  ].flat();
+}
+
+function genericProcess(thing, opts) {
+  const fields = baselineProcess(thing, opts);
+
+  const kind =
+    thing.constructor[Symbol.for('Thing.referenceType')];
+
+  fields.parentName =
+    (kind === 'track'
+      ? thing.album.name
+   : kind === 'group'
+      ? thing.category.name
+   : kind === 'flash'
+      ? thing.act.name
+      : null);
+
+  fields.artTags =
+    (thing.constructor.hasPropertyDescriptor('artTags')
+      ? thing.artTags.map(artTag => artTag.nameShort)
+      : []);
+
+  fields.additionalNames =
+    (thing.constructor.hasPropertyDescriptor('additionalNames')
+      ? thing.additionalNames.map(entry => entry.name)
+   : thing.constructor.hasPropertyDescriptor('aliasNames')
+      ? thing.aliasNames
+      : []);
+
+  const contribKeys = [
+    'artistContribs',
+    'contributorContribs',
+  ];
+
+  const contributions =
+    contribKeys
+      .filter(key => Object.hasOwn(thing, key))
+      .flatMap(key => thing[key]);
+
+  fields.contributors =
+    contributions
+      .flatMap(({artist}) => [
+        artist.name,
+        ...artist.aliasNames,
+      ]);
+
+  const groups =
+     (Object.hasOwn(thing, 'groups')
+       ? thing.groups
+    : Object.hasOwn(thing, 'album')
+       ? thing.album.groups
+       : []);
+
+  const mainContributorNames =
+    contributions
+      .map(({artist}) => artist.name);
+
+  fields.groups =
+    groups
+      .filter(group => !mainContributorNames.includes(group.name))
+      .map(group => group.name);
+
+  return fields;
+}
 
-      fields.artwork =
-        prepareArtwork(thing, opts);
+const genericStore = baselineStore;
 
-      return fields;
-    },
+export const searchSpec = {
+  generic: {
+    query: genericQuery,
+    process: genericProcess,
 
     index: [
       'primaryName',
@@ -191,13 +203,25 @@ export const searchSpec = {
       'additionalNames',
       'contributors',
       'groups',
-    ],
+    ].map(field => ({field, tokenize: 'forward'})),
 
-    store: [
+    store: genericStore,
+  },
+
+  verbatim: {
+    query: genericQuery,
+    process: genericProcess,
+
+    index: [
       'primaryName',
-      'artwork',
-      'color',
+      'parentName',
+      'artTags',
+      'additionalNames',
+      'contributors',
+      'groups',
     ],
+
+    store: genericStore,
   },
 };
 
diff --git a/src/static/js/client/sidebar-search.js b/src/static/js/client/sidebar-search.js
index 42267a9a..b6008d28 100644
--- a/src/static/js/client/sidebar-search.js
+++ b/src/static/js/client/sidebar-search.js
@@ -772,7 +772,7 @@ function showSidebarSearchFailed() {
 function showSidebarSearchResults(results) {
   const {session} = info;
 
-  console.debug(`Showing search results:`, flattenResults(results));
+  console.debug(`Showing search results:`, tidyResults(results));
 
   showSearchSidebarColumn();
 
@@ -805,31 +805,27 @@ function showSidebarSearchResults(results) {
   restoreSidebarSearchResultsScrollOffset();
 }
 
-function flattenResults(results) {
-  const flatResults =
-    Object.entries(results)
-      .filter(([index]) => index === 'generic')
-      .flatMap(([index, results]) => results
-        .flatMap(({doc, id}) => ({
-          index,
-          reference: id ?? null,
-          referenceType: (id ? id.split(':')[0] : null),
-          directory: (id ? id.split(':')[1] : null),
-          data: doc,
-        })));
-
-  return flatResults;
+function tidyResults(results) {
+  const tidiedResults =
+    results.map(({doc, id}) => ({
+      reference: id ?? null,
+      referenceType: (id ? id.split(':')[0] : null),
+      directory: (id ? id.split(':')[1] : null),
+      data: doc,
+    }));
+
+  return tidiedResults;
 }
 
 function fillResultElements(results, {
   filterType = null,
 } = {}) {
-  const flatResults = flattenResults(results);
+  const tidiedResults = tidyResults(results);
 
   const filteredResults =
     (filterType
-      ? flatResults.filter(result => result.referenceType === filterType)
-      : flatResults);
+      ? tidiedResults.filter(result => result.referenceType === filterType)
+      : tidiedResults);
 
   while (info.results.firstChild) {
     info.results.firstChild.remove();
@@ -853,10 +849,10 @@ function fillResultElements(results, {
 }
 
 function showFilterElements(results) {
-  const flatResults = flattenResults(results);
+  const tidiedResults = tidyResults(results);
 
   const allReferenceTypes =
-    unique(flatResults.map(result => result.referenceType));
+    unique(tidiedResults.map(result => result.referenceType));
 
   let shownAny = false;
 
diff --git a/src/static/js/search-worker.js b/src/static/js/search-worker.js
index 1b4684ad..5ecb6eb4 100644
--- a/src/static/js/search-worker.js
+++ b/src/static/js/search-worker.js
@@ -371,56 +371,60 @@ function postActionResult(id, status, value) {
 }
 
 function performSearchAction({query, options}) {
-  const {generic, ...otherIndexes} = indexes;
+  const {generic, verbatim} = indexes;
 
   const genericResults =
     queryGenericIndex(generic, query, options);
 
-  const otherResults =
-    withEntries(otherIndexes, entries => entries
-      .map(([indexName, index]) => [
-        indexName,
-        index.search(query, options),
-      ]));
+  const verbatimResults =
+    queryVerbatimIndex(verbatim, query, options);
 
-  return {
-    generic: genericResults,
-    ...otherResults,
-  };
+  const verbatimIDs =
+    new Set(verbatimResults?.map(result => result.id));
+
+  const commonResults =
+    (verbatimResults && genericResults
+      ? genericResults
+          .filter(({id}) => verbatimIDs.has(id))
+      : verbatimResults ?? genericResults);
+
+  return commonResults;
 }
 
-function queryGenericIndex(index, query, options) {
-  const interestingFieldCombinations = [
-    ['primaryName', 'parentName', 'groups'],
-    ['primaryName', 'parentName'],
-    ['primaryName', 'groups', 'contributors'],
-    ['primaryName', 'groups', 'artTags'],
-    ['primaryName', 'groups'],
-    ['primaryName', 'contributors'],
-    ['primaryName', 'artTags'],
-    ['parentName', 'groups', 'artTags'],
-    ['parentName', 'artTags'],
-    ['groups', 'contributors'],
-    ['groups', 'artTags'],
-
-    // This prevents just matching *everything* tagged "john" if you
-    // only search "john", but it actually supports matching more than
-    // *two* tags at once: "john rose lowas" works! This is thanks to
-    // flexsearch matching multiple field values in a single query.
-    ['artTags', 'artTags'],
-
-    ['contributors', 'parentName'],
-    ['contributors', 'groups'],
-    ['primaryName', 'contributors'],
-    ['primaryName'],
-  ];
+const interestingFieldCombinations = [
+  ['primaryName', 'parentName', 'groups'],
+  ['primaryName', 'parentName'],
+  ['primaryName', 'groups', 'contributors'],
+  ['primaryName', 'groups', 'artTags'],
+  ['primaryName', 'groups'],
+  ['primaryName', 'contributors'],
+  ['primaryName', 'artTags'],
+  ['parentName', 'groups', 'artTags'],
+  ['parentName', 'artTags'],
+  ['groups', 'contributors'],
+  ['groups', 'artTags'],
+
+  // This prevents just matching *everything* tagged "john" if you
+  // only search "john", but it actually supports matching more than
+  // *two* tags at once: "john rose lowas" works! This is thanks to
+  // flexsearch matching multiple field values in a single query.
+  ['artTags', 'artTags'],
+
+  ['contributors', 'parentName'],
+  ['contributors', 'groups'],
+  ['primaryName', 'contributors'],
+  ['primaryName'],
+];
 
+function queryGenericIndex(index, query, options) {
   const interestingFields =
     unique(interestingFieldCombinations.flat());
 
   const {genericTerms, queriedKind} =
     processTerms(query);
 
+  if (empty(genericTerms)) return null;
+
   const particles =
     particulate(genericTerms);
 
@@ -499,6 +503,93 @@ function queryGenericIndex(index, query, options) {
   return constitutedAndFiltered;
 }
 
+function queryVerbatimIndex(index, query, options) {
+  const interestingFields =
+    unique(interestingFieldCombinations.flat());
+
+  const {verbatimTerms, queriedKind} =
+    processTerms(query);
+
+  if (empty(verbatimTerms)) return null;
+
+  const particles =
+    particulate(verbatimTerms);
+
+  const groupedParticles =
+    groupArray(particles, ({length}) => length);
+
+  const queriesBy = keys =>
+    (groupedParticles.get(keys.length) ?? [])
+      .flatMap(permutations)
+      .map(values => values.map(({terms}) => terms.join(' ')))
+      .map(values =>
+        stitchArrays({
+          field: keys,
+          query: values,
+        }));
+
+  const boilerplate = queryBoilerplate(index);
+
+  const particleResults =
+    Object.fromEntries(
+      interestingFields.map(field => [
+        field,
+        Object.fromEntries(
+          particles.flat()
+            .map(({terms}) => terms.join(' '))
+            .map(query => [
+              query,
+              new Set(
+                boilerplate
+                  .query(query, {
+                    ...options,
+                    field,
+                    limit: Infinity,
+                  })
+                  .fieldResults[field]),
+            ])),
+      ]));
+
+  const results = new Set();
+
+  for (const interestingFieldCombination of interestingFieldCombinations) {
+    for (const query of queriesBy(interestingFieldCombination)) {
+      const idToMatchingFieldsMap = new Map();
+      for (const {field, query: fieldQuery} of query) {
+        for (const id of particleResults[field][fieldQuery]) {
+          if (idToMatchingFieldsMap.has(id)) {
+            idToMatchingFieldsMap.get(id).push(field);
+          } else {
+            idToMatchingFieldsMap.set(id, [field]);
+          }
+        }
+      }
+
+      const commonAcrossFields =
+        Array.from(idToMatchingFieldsMap.entries())
+          .filter(([id, matchingFields]) =>
+            matchingFields.length === interestingFieldCombination.length)
+          .map(([id]) => id);
+
+      for (const result of commonAcrossFields) {
+        results.add(result);
+      }
+    }
+  }
+
+  const constituted =
+    boilerplate.constitute(results);
+
+  const constitutedAndFiltered =
+    constituted
+      .filter(({id}) =>
+        (queriedKind
+          ? id.split(':')[0] === queriedKind
+          : true));
+
+  return constitutedAndFiltered;
+}
+
 function processTerms(query) {
   const kindTermSpec = [
     {kind: 'album', terms: ['album']},
@@ -510,11 +601,14 @@ function processTerms(query) {
   ];
 
   const genericTerms = [];
+  const verbatimTerms = [];
   let queriedKind = null;
 
   const termRegexp =
     new RegExp(
       String.raw`(?<kind>${kindTermSpec.flatMap(spec => spec.terms).join('|')})` +
+      String.raw`|(?<=^|\s)(?<quote>["'])(?<regularVerbatim>.+?)\k<quote>(?=$|\s)` +
+      String.raw`|(?<=^|\s)[“”‘’](?<curlyVerbatim>.+?)[“”‘’](?=$|\s)` +
       String.raw`|[^\s\-]+`,
       'gi');
 
@@ -530,10 +624,16 @@ function processTerms(query) {
       continue;
     }
 
+    const verbatim = groups.regularVerbatim || groups.curlyVerbatim;
+    if (verbatim) {
+      verbatimTerms.push(verbatim);
+      continue;
+    }
+
     genericTerms.push(match[0]);
   }
 
-  return {genericTerms, queriedKind};
+  return {genericTerms, verbatimTerms, queriedKind};
 }
 
 function particulate(terms) {