From 3b2f391d291e37e0c39a3fc4c7a406c4b6da12b1 Mon Sep 17 00:00:00 2001 From: Gio Date: Mon, 25 Mar 2024 20:47:21 -0500 Subject: upd8: Search implementation --- package-lock.json | 11 ++++++ package.json | 4 ++- src/data/things/search.js | 90 ++++++++++++++++++++++++++++++++++++++++++++++ src/static/clientSearch.js | 42 ++++++++++++++++++++++ src/upd8.js | 36 +++++++++++++++++++ 5 files changed, 182 insertions(+), 1 deletion(-) create mode 100644 src/data/things/search.js create mode 100644 src/static/clientSearch.js diff --git a/package-lock.json b/package-lock.json index 38e0324d..8caa8aaa 100644 --- a/package-lock.json +++ b/package-lock.json @@ -13,6 +13,7 @@ "chroma-js": "^2.4.2", "command-exists": "^1.2.9", "eslint": "^8.37.0", + "flexsearch": "^0.7.43", "he": "^1.2.0", "image-size": "^1.0.2", "js-yaml": "^4.1.0", @@ -2318,6 +2319,11 @@ "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.2.5.tgz", "integrity": "sha512-WIWGi2L3DyTUvUrwRKgGi9TwxQMUEqPOPQBVi71R96jZXJdFskXEmf54BoZaS1kknGODoIGASGEzBUYdyMCBJg==" }, + "node_modules/flexsearch": { + "version": "0.7.43", + "resolved": "https://registry.npmjs.org/flexsearch/-/flexsearch-0.7.43.tgz", + "integrity": "sha512-c5o/+Um8aqCSOXGcZoqZOm+NqtVwNsvVpWv6lfmSclU954O3wvQKxxK8zj74fPaSJbXpSLTs4PRhh+wnoCXnKg==" + }, "node_modules/foreground-child": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.1.1.tgz", @@ -6990,6 +6996,11 @@ "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.2.5.tgz", "integrity": "sha512-WIWGi2L3DyTUvUrwRKgGi9TwxQMUEqPOPQBVi71R96jZXJdFskXEmf54BoZaS1kknGODoIGASGEzBUYdyMCBJg==" }, + "flexsearch": { + "version": "0.7.43", + "resolved": "https://registry.npmjs.org/flexsearch/-/flexsearch-0.7.43.tgz", + "integrity": "sha512-c5o/+Um8aqCSOXGcZoqZOm+NqtVwNsvVpWv6lfmSclU954O3wvQKxxK8zj74fPaSJbXpSLTs4PRhh+wnoCXnKg==" + }, "foreground-child": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.1.1.tgz", diff --git a/package.json b/package.json index 88f351bc..d40d9295 100644 --- a/package.json +++ b/package.json @@ -38,9 +38,10 @@ "#node-utils": "./src/util/node-utils.js", "#repl": "./src/write/build-modes/repl.js", "#replacer": "./src/util/replacer.js", + "#search": "./src/data/things/search.js", "#serialize": "./src/data/serialize.js", - "#sugar": "./src/util/sugar.js", "#sort": "./src/util/sort.js", + "#sugar": "./src/util/sugar.js", "#test-lib": "./test/lib/index.js", "#thing": "./src/data/thing.js", "#things": "./src/data/things/index.js", @@ -59,6 +60,7 @@ "chroma-js": "^2.4.2", "command-exists": "^1.2.9", "eslint": "^8.37.0", + "flexsearch": "^0.7.43", "he": "^1.2.0", "image-size": "^1.0.2", "js-yaml": "^4.1.0", diff --git a/src/data/things/search.js b/src/data/things/search.js new file mode 100644 index 00000000..df177071 --- /dev/null +++ b/src/data/things/search.js @@ -0,0 +1,90 @@ +#!/usr/bin/env node + +'use strict'; + +import { + writeFile, +} from 'node:fs/promises'; + +import { + logWarn, + logInfo, + logError, +} from '#cli'; + +import Thing from '#thing'; + +import FlexSearch from 'flexsearch'; + +export async function writeSearchIndex(search_index_path, wikiData) { + + // Basic flow is: + // 1. Define schema for type + // 2. Add documents to index + // 3. Save index to exportable json + + // Copy this block directly into clientSearch.js + const indexes = { + albums: new FlexSearch.Document({ + id: "reference", + index: ["name", "groups"], + }), + tracks: new FlexSearch.Document({ + id: "reference", + index: ["track", "album", "artists", "directory", "additionalNames"], + }), + artists: new FlexSearch.Document({ + id: "reference", + index: ["names"], + }) + } + + wikiData.albumData.forEach((album) => { + indexes.albums.add({ + reference: Thing.getReference(album), + name: album.name, + groups: album.groups.map(group => group.name), + }) + + album.tracks.forEach((track) => { + indexes.tracks.add({ + reference: Thing.getReference(track), + album: album.name, + track: track.name, + artists: [ + track.artistContribs.map(contrib => contrib.artist.name), + ...track.artistContribs.map(contrib => contrib.artist.aliasNames) + ], + additionalNames: track.additionalNames.map(entry => entry.name) + }) + }) + }); + + wikiData.artistData + .filter(artist => !artist.isAlias) + .forEach((artist) => { + indexes.artists.add({ + reference: Thing.getReference(artist), + names: [ + artist.name, + ...artist.aliasNames + ] + }) + }) + + // Export indexes to json + let searchData = {} + + await Promise.all( + Object.entries(indexes) + .map(pair => { + const [index_name, index] = pair + searchData[index_name] = {} + return index.export((key, data) => { + searchData[index_name][key] = data + }); + }) + ) + + writeFile(search_index_path, JSON.stringify(searchData)) +} diff --git a/src/static/clientSearch.js b/src/static/clientSearch.js new file mode 100644 index 00000000..4d01cfd9 --- /dev/null +++ b/src/static/clientSearch.js @@ -0,0 +1,42 @@ +/* eslint-env browser */ + +async function initSearch() { + const FlexSearch = window.FlexSearch; + + // Copied directly from server search.js + window.indexes = { + albums: new FlexSearch.Document({ + id: "reference", + index: ["name", "groups"], + }), + tracks: new FlexSearch.Document({ + id: "reference", + index: ["track", "album", "artists", "directory", "additionalNames"], + }), + artists: new FlexSearch.Document({ + id: "reference", + index: ["names"], + }) + } + + let searchData = await fetch('/media/search_index.json').then(resp => resp.json()) + + Object.entries(searchData).forEach(key_index_pair => { + const [index_key, index_data] = key_index_pair + Object.entries(index_data).forEach(key_value_pair => { + const [key, value] = key_value_pair + window.indexes[index_key].import(key, value); + }) + }) +} + +function searchAll(query, options) { + options = options || {} + return Object.entries(window.indexes).reduce((a, pair) => { + const [k, v] = pair + a[k] = v.search(query, options) + return a + }, {}) +} + +document.addEventListener('DOMContentLoaded', initSearch); diff --git a/src/upd8.js b/src/upd8.js index cdf39923..358bf47b 100755 --- a/src/upd8.js +++ b/src/upd8.js @@ -51,6 +51,8 @@ import {sortByName} from '#sort'; import {generateURLs, urlSpec} from '#urls'; import {identifyAllWebRoutes} from '#web-routes'; +import {writeSearchIndex} from '#search' + import { colors, decorateTime, @@ -143,6 +145,9 @@ async function main() { {...defaultStepStatus, name: `generate thumbnails`, for: ['thumbs']}, + buildSearchIndex: + {...defaultStepStatus, name: `generate search index`}, + loadDataFiles: {...defaultStepStatus, name: `load and process data files`, for: ['build']}, @@ -356,6 +361,11 @@ async function main() { type: 'flag', }, + 'skip-search': { + help: `Skip creation of the text search file`, + type: 'flag', + }, + // Just working on data entries and not interested in actually // generating site HTML yet? This flag will cut execution off right // 8efore any site 8uilding actually happens. @@ -760,6 +770,15 @@ async function main() { buildConfig: 'webRoutes', }); + fallbackStep('buildSearchIndex', { + default: 'perform', + buildConfig: 'search', + cli: { + flag: 'skip-search', + negate: true, + }, + }); + fallbackStep('verifyImagePaths', { default: 'perform', buildConfig: 'mediaValidation', @@ -1456,6 +1475,23 @@ async function main() { }); } + if (stepStatusSummary.buildSearchIndex.status === STATUS_NOT_STARTED) { + Object.assign(stepStatusSummary.buildSearchIndex, { + status: STATUS_STARTED_NOT_DONE, + timeStart: Date.now(), + }); + + const search_index_path = path.join(mediaPath, "search_index.json") + logInfo(`Search index: ${search_index_path}`) + + await writeSearchIndex(search_index_path, wikiData) + + Object.assign(stepStatusSummary.buildSearchIndex, { + status: STATUS_DONE_CLEAN, + timeEnd: Date.now(), + }); + } + // Filter out any things with duplicate directories throughout the data, // warning about them too. -- cgit 1.3.0-6-gf8a5