From 3b2f391d291e37e0c39a3fc4c7a406c4b6da12b1 Mon Sep 17 00:00:00 2001
From: Gio <sethg@ipi.org>
Date: Mon, 25 Mar 2024 20:47:21 -0500
Subject: upd8: Search implementation

---
 src/data/things/search.js  | 90 ++++++++++++++++++++++++++++++++++++++++++++++
 src/static/clientSearch.js | 42 ++++++++++++++++++++++
 src/upd8.js                | 36 +++++++++++++++++++
 3 files changed, 168 insertions(+)
 create mode 100644 src/data/things/search.js
 create mode 100644 src/static/clientSearch.js

(limited to 'src')

diff --git a/src/data/things/search.js b/src/data/things/search.js
new file mode 100644
index 00000000..df177071
--- /dev/null
+++ b/src/data/things/search.js
@@ -0,0 +1,90 @@
+#!/usr/bin/env node
+
+'use strict';
+
+import {
+  writeFile,
+} from 'node:fs/promises';
+
+import {
+  logWarn,
+  logInfo,
+  logError,
+} from '#cli';
+
+import Thing from '#thing';
+
+import FlexSearch from 'flexsearch';
+
+export async function writeSearchIndex(search_index_path, wikiData) {
+
+  // Basic flow is:
+  // 1. Define schema for type
+  // 2. Add documents to index
+  // 3. Save index to exportable json
+
+  // Copy this block directly into clientSearch.js
+  const indexes = {
+    albums: new FlexSearch.Document({
+      id: "reference",
+      index: ["name", "groups"],
+    }),
+    tracks: new FlexSearch.Document({
+      id: "reference",
+      index: ["track", "album", "artists", "directory", "additionalNames"],
+    }),
+    artists: new FlexSearch.Document({
+      id: "reference",
+      index: ["names"],
+    })
+  }
+
+  wikiData.albumData.forEach((album) => {
+    indexes.albums.add({
+      reference: Thing.getReference(album),
+      name: album.name,
+      groups: album.groups.map(group => group.name),
+    })
+
+    album.tracks.forEach((track) => {
+      indexes.tracks.add({
+        reference: Thing.getReference(track),
+        album: album.name,
+        track: track.name,
+        artists: [
+          track.artistContribs.map(contrib => contrib.artist.name),
+          ...track.artistContribs.map(contrib => contrib.artist.aliasNames)
+        ],
+        additionalNames: track.additionalNames.map(entry => entry.name)
+      })
+    })
+  });
+
+  wikiData.artistData
+  .filter(artist => !artist.isAlias)
+  .forEach((artist) => {
+    indexes.artists.add({
+      reference: Thing.getReference(artist),
+      names: [
+        artist.name,
+        ...artist.aliasNames
+      ]
+    })
+  })
+
+  // Export indexes to json
+  let searchData = {}
+
+  await Promise.all(
+    Object.entries(indexes)
+    .map(pair => {
+      const [index_name, index] = pair
+      searchData[index_name] = {}
+      return index.export((key, data) => {
+        searchData[index_name][key] = data
+      });
+    })
+  )
+
+  writeFile(search_index_path, JSON.stringify(searchData))
+}
diff --git a/src/static/clientSearch.js b/src/static/clientSearch.js
new file mode 100644
index 00000000..4d01cfd9
--- /dev/null
+++ b/src/static/clientSearch.js
@@ -0,0 +1,42 @@
+/* eslint-env browser */
+
+async function initSearch() {
+  const FlexSearch = window.FlexSearch;
+
+  // Copied directly from server search.js
+  window.indexes = {
+    albums: new FlexSearch.Document({
+      id: "reference",
+      index: ["name", "groups"],
+    }),
+    tracks: new FlexSearch.Document({
+      id: "reference",
+      index: ["track", "album", "artists", "directory", "additionalNames"],
+    }),
+    artists: new FlexSearch.Document({
+      id: "reference",
+      index: ["names"],
+    })
+  }
+
+  let searchData = await fetch('/media/search_index.json').then(resp => resp.json())
+
+  Object.entries(searchData).forEach(key_index_pair => {
+    const [index_key, index_data] = key_index_pair
+    Object.entries(index_data).forEach(key_value_pair => {
+      const [key, value] = key_value_pair
+      window.indexes[index_key].import(key, value);
+    })
+  })
+}
+
+function searchAll(query, options) {
+  options = options || {}
+  return Object.entries(window.indexes).reduce((a, pair) => {
+    const [k, v] = pair
+    a[k] = v.search(query, options)
+    return a
+  }, {})
+}
+
+document.addEventListener('DOMContentLoaded', initSearch);
diff --git a/src/upd8.js b/src/upd8.js
index cdf39923..358bf47b 100755
--- a/src/upd8.js
+++ b/src/upd8.js
@@ -51,6 +51,8 @@ import {sortByName} from '#sort';
 import {generateURLs, urlSpec} from '#urls';
 import {identifyAllWebRoutes} from '#web-routes';
 
+import {writeSearchIndex} from '#search'
+
 import {
   colors,
   decorateTime,
@@ -143,6 +145,9 @@ async function main() {
       {...defaultStepStatus, name: `generate thumbnails`,
         for: ['thumbs']},
 
+    buildSearchIndex:
+      {...defaultStepStatus, name: `generate search index`},
+
     loadDataFiles:
       {...defaultStepStatus, name: `load and process data files`,
         for: ['build']},
@@ -356,6 +361,11 @@ async function main() {
       type: 'flag',
     },
 
+    'skip-search': {
+      help: `Skip creation of the text search file`,
+      type: 'flag',
+    },
+
     // Just working on data entries and not interested in actually
     // generating site HTML yet? This flag will cut execution off right
     // 8efore any site 8uilding actually happens.
@@ -760,6 +770,15 @@ async function main() {
       buildConfig: 'webRoutes',
     });
 
+    fallbackStep('buildSearchIndex', {
+      default: 'perform',
+      buildConfig: 'search',
+      cli: {
+        flag: 'skip-search',
+        negate: true,
+      },
+    });
+
     fallbackStep('verifyImagePaths', {
       default: 'perform',
       buildConfig: 'mediaValidation',
@@ -1456,6 +1475,23 @@ async function main() {
     });
   }
 
+  if (stepStatusSummary.buildSearchIndex.status === STATUS_NOT_STARTED) {
+    Object.assign(stepStatusSummary.buildSearchIndex, {
+      status: STATUS_STARTED_NOT_DONE,
+      timeStart: Date.now(),
+    });
+
+    const search_index_path = path.join(mediaPath, "search_index.json")
+    logInfo(`Search index: ${search_index_path}`)
+
+    await writeSearchIndex(search_index_path, wikiData)
+
+    Object.assign(stepStatusSummary.buildSearchIndex, {
+      status: STATUS_DONE_CLEAN,
+      timeEnd: Date.now(),
+    });
+  }
+
   // Filter out any things with duplicate directories throughout the data,
   // warning about them too.
 
-- 
cgit 1.3.0-6-gf8a5