« get me outta code hell

hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src/static/js/search-worker.js
diff options
context:
space:
mode:
Diffstat (limited to 'src/static/js/search-worker.js')
-rw-r--r--src/static/js/search-worker.js621
1 files changed, 621 insertions, 0 deletions
diff --git a/src/static/js/search-worker.js b/src/static/js/search-worker.js
new file mode 100644
index 00000000..8d987a74
--- /dev/null
+++ b/src/static/js/search-worker.js
@@ -0,0 +1,621 @@
+/* eslint-env worker */
+
+import FlexSearch from '../lib/flexsearch/flexsearch.bundle.module.min.js';
+
+import {makeSearchIndex, searchSpec} from '../shared-util/search-spec.js';
+
+import {
+  empty,
+  groupArray,
+  promiseWithResolvers,
+  stitchArrays,
+  unique,
+  withEntries,
+} from '../shared-util/sugar.js';
+
+import {loadDependency} from './module-import-shims.js';
+import {fetchWithProgress} from './xhr-util.js';
+
+// Will be loaded from dependencies.
+let decompress;
+let unpack;
+
+let idb;
+
+let status = null;
+let indexes = null;
+
+onmessage = handleWindowMessage;
+onerror = handleRuntimeError;
+onunhandledrejection = handleRuntimeError;
+postStatus('alive');
+
+Promise.all([
+  loadDependencies(),
+  loadDatabase(),
+]).then(main)
+  .then(
+    () => {
+      postStatus('ready');
+    },
+    error => {
+      console.error(`Search worker setup error:`, error);
+      postStatus('setup-error');
+    });
+
+async function loadDependencies() {
+  const {compressJSON} =
+    await loadDependency.fromWindow('../lib/compress-json/bundle.min.js');
+
+  const msgpackr =
+    await loadDependency.fromModuleExports('../lib/msgpackr/index.js');
+
+  ({decompress} = compressJSON);
+  ({unpack} = msgpackr);
+}
+
+async function promisifyIDBRequest(request) {
+  const {promise, resolve, reject} = promiseWithResolvers();
+
+  request.addEventListener('success', () => resolve(request.result));
+  request.addEventListener('error', () => reject(request.error));
+
+  return promise;
+}
+
+async function* iterateIDBObjectStore(store, query) {
+  const request =
+    store.openCursor(query);
+
+  let promise, resolve, reject;
+  let cursor;
+
+  request.onsuccess = () => {
+    cursor = request.result;
+    if (cursor) {
+      resolve({done: false, value: [cursor.key, cursor.value]});
+    } else {
+      resolve({done: true});
+    }
+  };
+
+  request.onerror = () => {
+    reject(request.error);
+  };
+
+  do {
+    ({promise, resolve, reject} = promiseWithResolvers());
+
+    const result = await promise;
+
+    if (result.done) {
+      return;
+    }
+
+    yield result.value;
+
+    cursor.continue();
+  } while (true);
+}
+
+async function loadCachedIndexFromIDB() {
+  if (!idb) return null;
+
+  const transaction =
+    idb.transaction(['indexes'], 'readwrite');
+
+  const store =
+    transaction.objectStore('indexes');
+
+  const result = {};
+
+  for await (const [key, object] of iterateIDBObjectStore(store)) {
+    result[key] = object;
+  }
+
+  return result;
+}
+
+async function loadDatabase() {
+  const request =
+    globalThis.indexedDB.open('hsmusicSearchDatabase', 4);
+
+  request.addEventListener('upgradeneeded', () => {
+    const idb = request.result;
+
+    idb.createObjectStore('indexes', {
+      keyPath: 'key',
+    });
+  });
+
+  try {
+    idb = await promisifyIDBRequest(request);
+  } catch (error) {
+    console.warn(`Couldn't load search IndexedDB - won't use an internal cache.`);
+    console.warn(request.error);
+    idb = null;
+  }
+}
+
+function rebase(path) {
+  return `/search-data/` + path;
+}
+
+async function prepareIndexData() {
+  return Promise.all([
+    fetch(rebase('index.json'))
+      .then(resp => resp.json()),
+
+    loadCachedIndexFromIDB(),
+  ]).then(
+      ([indexData, idbIndexData]) =>
+        ({indexData, idbIndexData}));
+}
+
+function fetchIndexes(keysNeedingFetch) {
+  if (!empty(keysNeedingFetch)) {
+    postMessage({
+      kind: 'download-begun',
+      context: 'search-indexes',
+      keys: keysNeedingFetch,
+    });
+  }
+
+  return (
+    keysNeedingFetch.map(key =>
+      fetchWithProgress(
+        rebase(key + '.json.msgpack'),
+        progress => {
+          postMessage({
+            kind: 'download-progress',
+            context: 'search-indexes',
+            progress: progress / 1.00,
+            key,
+          });
+        }).then(response => {
+            postMessage({
+              kind: 'download-complete',
+              context: 'search-indexes',
+              key,
+            });
+
+            return response;
+          })));
+}
+
+async function main() {
+  const prepareIndexDataPromise = prepareIndexData();
+
+  indexes =
+    withEntries(searchSpec, entries => entries
+      .map(([key, descriptor]) => [
+        key,
+        makeSearchIndex(descriptor, {FlexSearch}),
+      ]));
+
+  const {indexData, idbIndexData} = await prepareIndexDataPromise;
+
+  const keysNeedingFetch =
+    (idbIndexData
+      ? Object.keys(indexData)
+          .filter(key =>
+            indexData[key].md5 !==
+            idbIndexData[key]?.md5)
+      : Object.keys(indexData));
+
+  const keysFromCache =
+    Object.keys(indexData)
+      .filter(key => !keysNeedingFetch.includes(key))
+
+  const cacheArrayBufferPromises =
+    keysFromCache
+      .map(key => idbIndexData[key])
+      .map(({cachedBinarySource}) =>
+        cachedBinarySource.arrayBuffer());
+
+  const fetchPromises =
+    fetchIndexes(keysNeedingFetch);
+
+  const fetchBlobPromises =
+    fetchPromises
+      .map(promise => promise
+        .then(response => response.blob()));
+
+  const fetchArrayBufferPromises =
+    fetchBlobPromises
+      .map(promise => promise
+        .then(blob => blob.arrayBuffer()));
+
+  function arrayBufferToJSON(data) {
+    data = new Uint8Array(data);
+    data = unpack(data);
+    data = decompress(data);
+    return data;
+  }
+
+  function importIndexes(keys, jsons) {
+    stitchArrays({key: keys, json: jsons})
+      .forEach(({key, json}) => {
+        importIndex(key, json);
+      });
+  }
+
+  if (idb) {
+    console.debug(`Reusing indexes from search cache:`, keysFromCache);
+    console.debug(`Fetching indexes anew:`, keysNeedingFetch);
+  }
+
+  await Promise.all([
+    async () => {
+      const cacheArrayBuffers =
+        await Promise.all(cacheArrayBufferPromises);
+
+      const cacheJSONs =
+        cacheArrayBuffers
+          .map(arrayBufferToJSON);
+
+      importIndexes(keysFromCache, cacheJSONs);
+    },
+
+    async () => {
+      const fetchArrayBuffers =
+        await Promise.all(fetchArrayBufferPromises);
+
+      const fetchJSONs =
+        fetchArrayBuffers
+          .map(arrayBufferToJSON);
+
+      importIndexes(keysNeedingFetch, fetchJSONs);
+    },
+
+    async () => {
+      if (!idb) return;
+
+      const fetchBlobs =
+        await Promise.all(fetchBlobPromises);
+
+      const transaction =
+        idb.transaction(['indexes'], 'readwrite');
+
+      const store =
+        transaction.objectStore('indexes');
+
+      for (const {key, blob} of stitchArrays({
+        key: keysNeedingFetch,
+        blob: fetchBlobs,
+      })) {
+        const value = {
+          key,
+          md5: indexData[key].md5,
+          cachedBinarySource: blob,
+        };
+
+        try {
+          await promisifyIDBRequest(store.put(value));
+        } catch (error) {
+          console.warn(`Error saving ${key} to internal search cache:`, value);
+          console.warn(error);
+          continue;
+        }
+      }
+    },
+  ].map(fn => fn()));
+}
+
+function importIndex(indexKey, indexData) {
+  // If this fails, it's because an outdated index was cached.
+  // TODO: If this fails, try again once with a cache busting url.
+  for (const [key, value] of Object.entries(indexData)) {
+    indexes[indexKey].import(key, JSON.stringify(value));
+  }
+}
+
+function handleRuntimeError() {
+  postStatus('runtime-error');
+}
+
+function handleWindowMessage(message) {
+  switch (message.data.kind) {
+    case 'action':
+      handleWindowActionMessage(message);
+      break;
+
+    default:
+      console.warn(`Unknown message kind -> to search worker:`, message.data);
+      break;
+  }
+}
+
+async function handleWindowActionMessage(message) {
+  const {id} = message.data;
+
+  if (!id) {
+    console.warn(`Action without id -> to search worker:`, message.data);
+    return;
+  }
+
+  if (status !== 'ready') {
+    return postActionResult(id, 'reject', 'not ready');
+  }
+
+  let value;
+
+  switch (message.data.action) {
+    case 'search':
+      value = await performSearchAction(message.data.options);
+      break;
+
+    default:
+      console.warn(`Unknown action "${message.data.action}" -> to search worker:`, message.data);
+      return postActionResult(id, 'reject', 'unknown action');
+  }
+
+  await postActionResult(id, 'resolve', value);
+}
+
+function postStatus(newStatus) {
+  status = newStatus;
+  postMessage({
+    kind: 'status',
+    status: newStatus,
+  });
+}
+
+function postActionResult(id, status, value) {
+  postMessage({
+    kind: 'result',
+    id,
+    status,
+    value,
+  });
+}
+
+function performSearchAction({query, options}) {
+  const {generic, ...otherIndexes} = indexes;
+
+  const genericResults =
+    queryGenericIndex(generic, query, options);
+
+  const otherResults =
+    withEntries(otherIndexes, entries => entries
+      .map(([indexName, index]) => [
+        indexName,
+        index.search(query, options),
+      ]));
+
+  return {
+    generic: genericResults,
+    ...otherResults,
+  };
+}
+
+function queryGenericIndex(index, query, options) {
+  const interestingFieldCombinations = [
+    ['primaryName', 'parentName', 'groups'],
+    ['primaryName', 'parentName'],
+    ['primaryName', 'groups', 'contributors'],
+    ['primaryName', 'groups', 'artTags'],
+    ['primaryName', 'groups'],
+    ['primaryName', 'contributors'],
+    ['primaryName', 'artTags'],
+    ['parentName', 'groups', 'artTags'],
+    ['parentName', 'artTags'],
+    ['groups', 'contributors'],
+    ['groups', 'artTags'],
+
+    // This prevents just matching *everything* tagged "john" if you
+    // only search "john", but it actually supports matching more than
+    // *two* tags at once: "john rose lowas" works! This is thanks to
+    // flexsearch matching multiple field values in a single query.
+    ['artTags', 'artTags'],
+
+    ['contributors', 'parentName'],
+    ['contributors', 'groups'],
+    ['primaryName', 'contributors'],
+    ['primaryName'],
+  ];
+
+  const interestingFields =
+    unique(interestingFieldCombinations.flat());
+
+  const {genericTerms, queriedKind} =
+    processTerms(query);
+
+  const particles =
+    particulate(genericTerms);
+
+  const groupedParticles =
+    groupArray(particles, ({length}) => length);
+
+  const queriesBy = keys =>
+    (groupedParticles.get(keys.length) ?? [])
+      .flatMap(permutations)
+      .map(values => values.map(({terms}) => terms.join(' ')))
+      .map(values =>
+        stitchArrays({
+          field: keys,
+          query: values,
+        }));
+
+  const boilerplate = queryBoilerplate(index);
+
+  const particleResults =
+    Object.fromEntries(
+      interestingFields.map(field => [
+        field,
+        Object.fromEntries(
+          particles.flat()
+            .map(({terms}) => terms.join(' '))
+            .map(query => [
+              query,
+              new Set(
+                boilerplate
+                  .query(query, {
+                    ...options,
+                    field,
+                    limit: Infinity,
+                  })
+                  .fieldResults[field]),
+            ])),
+      ]));
+
+  const results = new Set();
+
+  for (const interestingFieldCombination of interestingFieldCombinations) {
+    for (const query of queriesBy(interestingFieldCombination)) {
+      const idToMatchingFieldsMap = new Map();
+      for (const {field, query: fieldQuery} of query) {
+        for (const id of particleResults[field][fieldQuery]) {
+          if (idToMatchingFieldsMap.has(id)) {
+            idToMatchingFieldsMap.get(id).push(field);
+          } else {
+            idToMatchingFieldsMap.set(id, [field]);
+          }
+        }
+      }
+
+      const commonAcrossFields =
+        Array.from(idToMatchingFieldsMap.entries())
+          .filter(([id, matchingFields]) =>
+            matchingFields.length === interestingFieldCombination.length)
+          .map(([id]) => id);
+
+      for (const result of commonAcrossFields) {
+        results.add(result);
+      }
+    }
+  }
+
+  const constituted =
+    boilerplate.constitute(results);
+
+  const constitutedAndFiltered =
+    constituted
+      .filter(({id}) =>
+        (queriedKind
+          ? id.split(':')[0] === queriedKind
+          : true));
+
+  return constitutedAndFiltered;
+}
+
+function processTerms(query) {
+  const kindTermSpec = [
+    {kind: 'album', terms: ['album']},
+    {kind: 'artist', terms: ['artist']},
+    {kind: 'flash', terms: ['flash']},
+    {kind: 'group', terms: ['group']},
+    {kind: 'tag', terms: ['art tag', 'tag']},
+    {kind: 'track', terms: ['track']},
+  ];
+
+  const genericTerms = [];
+  let queriedKind = null;
+
+  const termRegexp =
+    new RegExp(
+      String.raw`(?<kind>${kindTermSpec.flatMap(spec => spec.terms).join('|')})` +
+      String.raw`|\S+`,
+      'gi');
+
+  for (const match of query.matchAll(termRegexp)) {
+    const {groups} = match;
+
+    if (groups.kind && !queriedKind) {
+      queriedKind =
+        kindTermSpec
+          .find(({terms}) => terms.includes(groups.kind.toLowerCase()))
+          .kind;
+
+      continue;
+    }
+
+    genericTerms.push(match[0]);
+  }
+
+  return {genericTerms, queriedKind};
+}
+
+function particulate(terms) {
+  if (empty(terms)) return [];
+
+  const results = [];
+
+  for (let slice = 1; slice <= 2; slice++) {
+    if (slice === terms.length) {
+      break;
+    }
+
+    const front = terms.slice(0, slice);
+    const back = terms.slice(slice);
+
+    results.push(...
+      particulate(back)
+        .map(result => [
+          {terms: front},
+          ...result
+        ]));
+  }
+
+  results.push([{terms}]);
+
+  return results;
+}
+
+// This function doesn't even come close to "performant",
+// but it only operates on small data here.
+function permutations(array) {
+  switch (array.length) {
+    case 0:
+      return [];
+
+    case 1:
+      return [array];
+
+    default:
+      return array.flatMap((item, index) => {
+        const behind = array.slice(0, index);
+        const ahead = array.slice(index + 1);
+        return (
+          permutations([...behind, ...ahead])
+            .map(rest => [item, ...rest]));
+      });
+  }
+}
+
+function queryBoilerplate(index) {
+  const idToDoc = {};
+
+  return {
+    idToDoc,
+
+    constitute: (ids) =>
+      Array.from(ids)
+        .map(id => ({id, doc: idToDoc[id]})),
+
+    query: (query, options) => {
+      const rawResults =
+        index.search(query, options);
+
+      const fieldResults =
+        Object.fromEntries(
+          rawResults
+            .map(({field, result}) => [
+              field,
+              result.map(result =>
+                (typeof result === 'string'
+                  ? result
+                  : result.id)),
+            ]));
+
+      Object.assign(
+        idToDoc,
+        Object.fromEntries(
+          rawResults
+            .flatMap(({result}) => result)
+            .map(({id, doc}) => [id, doc])));
+
+      return {rawResults, fieldResults};
+    },
+  };
+}