From 30914eb56468b388e4b5cb2090292c5932171eb3 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Mon, 13 May 2024 14:03:03 -0300 Subject: search, client: use json-compress and msgpackr for search indexes --- package-lock.json | 201 +++++++++++++++++++++++++++++++++++++++-- package.json | 2 + src/search.js | 29 +++--- src/static/js/search-worker.js | 44 ++++++--- src/web-routes.js | 10 ++ 5 files changed, 258 insertions(+), 28 deletions(-) diff --git a/package-lock.json b/package-lock.json index 8caa8aaa..3b85faa9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -12,12 +12,14 @@ "@js-temporal/polyfill": "^0.4.4", "chroma-js": "^2.4.2", "command-exists": "^1.2.9", + "compress-json": "^3.0.5", "eslint": "^8.37.0", "flexsearch": "^0.7.43", "he": "^1.2.0", "image-size": "^1.0.2", "js-yaml": "^4.1.0", "marked": "^10.0.0", + "msgpackr": "^1.10.2", "striptags": "^4.0.0-alpha.4", "word-wrap": "^1.2.3" }, @@ -303,6 +305,78 @@ "node": ">=12" } }, + "node_modules/@msgpackr-extract/msgpackr-extract-darwin-arm64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-darwin-arm64/-/msgpackr-extract-darwin-arm64-3.0.2.tgz", + "integrity": "sha512-9bfjwDxIDWmmOKusUcqdS4Rw+SETlp9Dy39Xui9BEGEk19dDwH0jhipwFzEff/pFg95NKymc6TOTbRKcWeRqyQ==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@msgpackr-extract/msgpackr-extract-darwin-x64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-darwin-x64/-/msgpackr-extract-darwin-x64-3.0.2.tgz", + "integrity": "sha512-lwriRAHm1Yg4iDf23Oxm9n/t5Zpw1lVnxYU3HnJPTi2lJRkKTrps1KVgvL6m7WvmhYVt/FIsssWay+k45QHeuw==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "darwin" + ] + }, + "node_modules/@msgpackr-extract/msgpackr-extract-linux-arm": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-arm/-/msgpackr-extract-linux-arm-3.0.2.tgz", + "integrity": "sha512-MOI9Dlfrpi2Cuc7i5dXdxPbFIgbDBGgKR5F2yWEa6FVEtSWncfVNKW5AKjImAQ6CZlBK9tympdsZJ2xThBiWWA==", + "cpu": [ + "arm" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@msgpackr-extract/msgpackr-extract-linux-arm64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-arm64/-/msgpackr-extract-linux-arm64-3.0.2.tgz", + "integrity": "sha512-FU20Bo66/f7He9Fp9sP2zaJ1Q8L9uLPZQDub/WlUip78JlPeMbVL8546HbZfcW9LNciEXc8d+tThSJjSC+tmsg==", + "cpu": [ + "arm64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@msgpackr-extract/msgpackr-extract-linux-x64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-x64/-/msgpackr-extract-linux-x64-3.0.2.tgz", + "integrity": "sha512-gsWNDCklNy7Ajk0vBBf9jEx04RUxuDQfBse918Ww+Qb9HCPoGzS+XJTLe96iN3BVK7grnLiYghP/M4L8VsaHeA==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "linux" + ] + }, + "node_modules/@msgpackr-extract/msgpackr-extract-win32-x64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-win32-x64/-/msgpackr-extract-win32-x64-3.0.2.tgz", + "integrity": "sha512-O+6Gs8UeDbyFpbSh2CPEz/UOrrdWPTBYNblZK5CxxLisYt4kGX3Sc+czffFonyjiGSq3jWLwJS/CCJc7tBr4sQ==", + "cpu": [ + "x64" + ], + "optional": true, + "os": [ + "win32" + ] + }, "node_modules/@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -1962,6 +2036,11 @@ "resolved": "https://registry.npmjs.org/command-exists/-/command-exists-1.2.9.tgz", "integrity": "sha512-LTQ/SGc+s0Xc0Fu5WaKnR0YiygZkm9eKFvyS+fRsU7/ZWFF8ykFM6Pc9aCVf1+xasOOZpO3BAVgVrKvsqKHV7w==" }, + "node_modules/compress-json": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/compress-json/-/compress-json-3.0.5.tgz", + "integrity": "sha512-HYiJvE0cTIygI9zXqY5fkRr7H3NV3UAME0enzwN5M0JkzMOtUcjSyaH7HxVRzXsn7IIXD0STA9M5jyWkxERSLg==" + }, "node_modules/concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -2074,9 +2153,9 @@ "dev": true }, "node_modules/escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", "dev": true, "engines": { "node": ">=6" @@ -3306,6 +3385,35 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "node_modules/msgpackr": { + "version": "1.10.2", + "resolved": "https://registry.npmjs.org/msgpackr/-/msgpackr-1.10.2.tgz", + "integrity": "sha512-L60rsPynBvNE+8BWipKKZ9jHcSGbtyJYIwjRq0VrIvQ08cRjntGXJYW/tmciZ2IHWIY8WEW32Qa2xbh5+SKBZA==", + "optionalDependencies": { + "msgpackr-extract": "^3.0.2" + } + }, + "node_modules/msgpackr-extract": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/msgpackr-extract/-/msgpackr-extract-3.0.2.tgz", + "integrity": "sha512-SdzXp4kD/Qf8agZ9+iTu6eql0m3kWm1A2y1hkpTeVNENutaB0BwHlSvAIaMxwntmRUAUjon2V4L8Z/njd0Ct8A==", + "hasInstallScript": true, + "optional": true, + "dependencies": { + "node-gyp-build-optional-packages": "5.0.7" + }, + "bin": { + "download-msgpackr-prebuilds": "bin/download-prebuilds.js" + }, + "optionalDependencies": { + "@msgpackr-extract/msgpackr-extract-darwin-arm64": "3.0.2", + "@msgpackr-extract/msgpackr-extract-darwin-x64": "3.0.2", + "@msgpackr-extract/msgpackr-extract-linux-arm": "3.0.2", + "@msgpackr-extract/msgpackr-extract-linux-arm64": "3.0.2", + "@msgpackr-extract/msgpackr-extract-linux-x64": "3.0.2", + "@msgpackr-extract/msgpackr-extract-win32-x64": "3.0.2" + } + }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -3344,6 +3452,17 @@ "node": "^16.14.0 || >=18.0.0" } }, + "node_modules/node-gyp-build-optional-packages": { + "version": "5.0.7", + "resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.0.7.tgz", + "integrity": "sha512-YlCCc6Wffkx0kHkmam79GKvDQ6x+QZkMjFGrIMxgFNILFvGSbCp2fCBC55pGTT9gVaz8Na5CLmxt/urtzRv36w==", + "optional": true, + "bin": { + "node-gyp-build-optional-packages": "bin.js", + "node-gyp-build-optional-packages-optional": "optional.js", + "node-gyp-build-optional-packages-test": "build-test.js" + } + }, "node_modules/node-gyp/node_modules/brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", @@ -5564,6 +5683,42 @@ "tslib": "^2.4.1" } }, + "@msgpackr-extract/msgpackr-extract-darwin-arm64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-darwin-arm64/-/msgpackr-extract-darwin-arm64-3.0.2.tgz", + "integrity": "sha512-9bfjwDxIDWmmOKusUcqdS4Rw+SETlp9Dy39Xui9BEGEk19dDwH0jhipwFzEff/pFg95NKymc6TOTbRKcWeRqyQ==", + "optional": true + }, + "@msgpackr-extract/msgpackr-extract-darwin-x64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-darwin-x64/-/msgpackr-extract-darwin-x64-3.0.2.tgz", + "integrity": "sha512-lwriRAHm1Yg4iDf23Oxm9n/t5Zpw1lVnxYU3HnJPTi2lJRkKTrps1KVgvL6m7WvmhYVt/FIsssWay+k45QHeuw==", + "optional": true + }, + "@msgpackr-extract/msgpackr-extract-linux-arm": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-arm/-/msgpackr-extract-linux-arm-3.0.2.tgz", + "integrity": "sha512-MOI9Dlfrpi2Cuc7i5dXdxPbFIgbDBGgKR5F2yWEa6FVEtSWncfVNKW5AKjImAQ6CZlBK9tympdsZJ2xThBiWWA==", + "optional": true + }, + "@msgpackr-extract/msgpackr-extract-linux-arm64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-arm64/-/msgpackr-extract-linux-arm64-3.0.2.tgz", + "integrity": "sha512-FU20Bo66/f7He9Fp9sP2zaJ1Q8L9uLPZQDub/WlUip78JlPeMbVL8546HbZfcW9LNciEXc8d+tThSJjSC+tmsg==", + "optional": true + }, + "@msgpackr-extract/msgpackr-extract-linux-x64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-linux-x64/-/msgpackr-extract-linux-x64-3.0.2.tgz", + "integrity": "sha512-gsWNDCklNy7Ajk0vBBf9jEx04RUxuDQfBse918Ww+Qb9HCPoGzS+XJTLe96iN3BVK7grnLiYghP/M4L8VsaHeA==", + "optional": true + }, + "@msgpackr-extract/msgpackr-extract-win32-x64": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/@msgpackr-extract/msgpackr-extract-win32-x64/-/msgpackr-extract-win32-x64-3.0.2.tgz", + "integrity": "sha512-O+6Gs8UeDbyFpbSh2CPEz/UOrrdWPTBYNblZK5CxxLisYt4kGX3Sc+czffFonyjiGSq3jWLwJS/CCJc7tBr4sQ==", + "optional": true + }, "@nodelib/fs.scandir": { "version": "2.1.5", "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", @@ -6725,6 +6880,11 @@ "resolved": "https://registry.npmjs.org/command-exists/-/command-exists-1.2.9.tgz", "integrity": "sha512-LTQ/SGc+s0Xc0Fu5WaKnR0YiygZkm9eKFvyS+fRsU7/ZWFF8ykFM6Pc9aCVf1+xasOOZpO3BAVgVrKvsqKHV7w==" }, + "compress-json": { + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/compress-json/-/compress-json-3.0.5.tgz", + "integrity": "sha512-HYiJvE0cTIygI9zXqY5fkRr7H3NV3UAME0enzwN5M0JkzMOtUcjSyaH7HxVRzXsn7IIXD0STA9M5jyWkxERSLg==" + }, "concat-map": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", @@ -6814,9 +6974,9 @@ "dev": true }, "escalade": { - "version": "3.1.1", - "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.1.tgz", - "integrity": "sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/escalade/-/escalade-3.1.2.tgz", + "integrity": "sha512-ErCHMCae19vR8vQGe50xIsVomy19rg6gFu3+r3jkEO46suLMWBksvVyoGgQV+jOfl84ZSOSlmv6Gxa89PmTGmA==", "dev": true }, "escape-string-regexp": { @@ -7721,6 +7881,29 @@ "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==" }, + "msgpackr": { + "version": "1.10.2", + "resolved": "https://registry.npmjs.org/msgpackr/-/msgpackr-1.10.2.tgz", + "integrity": "sha512-L60rsPynBvNE+8BWipKKZ9jHcSGbtyJYIwjRq0VrIvQ08cRjntGXJYW/tmciZ2IHWIY8WEW32Qa2xbh5+SKBZA==", + "requires": { + "msgpackr-extract": "^3.0.2" + } + }, + "msgpackr-extract": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/msgpackr-extract/-/msgpackr-extract-3.0.2.tgz", + "integrity": "sha512-SdzXp4kD/Qf8agZ9+iTu6eql0m3kWm1A2y1hkpTeVNENutaB0BwHlSvAIaMxwntmRUAUjon2V4L8Z/njd0Ct8A==", + "optional": true, + "requires": { + "@msgpackr-extract/msgpackr-extract-darwin-arm64": "3.0.2", + "@msgpackr-extract/msgpackr-extract-darwin-x64": "3.0.2", + "@msgpackr-extract/msgpackr-extract-linux-arm": "3.0.2", + "@msgpackr-extract/msgpackr-extract-linux-arm64": "3.0.2", + "@msgpackr-extract/msgpackr-extract-linux-x64": "3.0.2", + "@msgpackr-extract/msgpackr-extract-win32-x64": "3.0.2", + "node-gyp-build-optional-packages": "5.0.7" + } + }, "natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -7798,6 +7981,12 @@ } } }, + "node-gyp-build-optional-packages": { + "version": "5.0.7", + "resolved": "https://registry.npmjs.org/node-gyp-build-optional-packages/-/node-gyp-build-optional-packages-5.0.7.tgz", + "integrity": "sha512-YlCCc6Wffkx0kHkmam79GKvDQ6x+QZkMjFGrIMxgFNILFvGSbCp2fCBC55pGTT9gVaz8Na5CLmxt/urtzRv36w==", + "optional": true + }, "nopt": { "version": "7.2.0", "resolved": "https://registry.npmjs.org/nopt/-/nopt-7.2.0.tgz", diff --git a/package.json b/package.json index c8f96e98..fd8ac9cf 100644 --- a/package.json +++ b/package.json @@ -60,12 +60,14 @@ "@js-temporal/polyfill": "^0.4.4", "chroma-js": "^2.4.2", "command-exists": "^1.2.9", + "compress-json": "^3.0.5", "eslint": "^8.37.0", "flexsearch": "^0.7.43", "he": "^1.2.0", "image-size": "^1.0.2", "js-yaml": "^4.1.0", "marked": "^10.0.0", + "msgpackr": "^1.10.2", "striptags": "^4.0.0-alpha.4", "word-wrap": "^1.2.3" }, diff --git a/src/search.js b/src/search.js index 687aa215..a2dae9e1 100644 --- a/src/search.js +++ b/src/search.js @@ -4,7 +4,9 @@ import {createHash} from 'node:crypto'; import {mkdir, writeFile} from 'node:fs/promises'; import * as path from 'node:path'; +import {compress} from 'compress-json'; import FlexSearch from 'flexsearch'; +import {pack} from 'msgpackr'; import {logWarn} from '#cli'; import {makeSearchIndex, populateSearchIndex, searchSpec} from '#search-spec'; @@ -12,7 +14,7 @@ import {stitchArrays} from '#sugar'; import {checkIfImagePathHasCachedThumbnails, getThumbnailEqualOrSmaller} from '#thumbs'; -async function exportIndexToJSON(index) { +async function serializeIndex(index) { const results = {}; await index.export((key, data) => { @@ -29,7 +31,7 @@ async function exportIndexToJSON(index) { results[key] = JSON.parse(data); }); - return JSON.stringify(results); + return results; } export async function writeSearchData({ @@ -70,8 +72,13 @@ export async function writeSearchData({ wikiData, })); - const jsonIndexes = - await Promise.all(indexes.map(exportIndexToJSON)); + const serializedIndexes = + await Promise.all(indexes.map(serializeIndex)); + + const packedIndexes = + serializedIndexes + .map(data => compress(data)) + .map(data => pack(data)); const outputDirectory = path.join(wikiCachePath, 'search'); @@ -84,10 +91,10 @@ export async function writeSearchData({ Object.fromEntries( stitchArrays({ key: keys, - json: jsonIndexes, - }).map(({key, json}) => { + buffer: packedIndexes, + }).map(({key, buffer}) => { const md5 = createHash('md5'); - md5.write(json); + md5.write(buffer); const value = { md5: md5.digest('hex'), @@ -102,11 +109,11 @@ export async function writeSearchData({ await Promise.all( stitchArrays({ key: keys, - json: jsonIndexes, - }).map(({key, json}) => + buffer: packedIndexes, + }).map(({key, buffer}) => writeFile( - path.join(outputDirectory, key + '.json'), - json))); + path.join(outputDirectory, key + '.json.msgpack'), + buffer))); await writeFile(mainIndexFile, mainIndexJSON); } diff --git a/src/static/js/search-worker.js b/src/static/js/search-worker.js index c3975380..b8ab4a63 100644 --- a/src/static/js/search-worker.js +++ b/src/static/js/search-worker.js @@ -1,8 +1,14 @@ +import FlexSearch from '../lib/flexsearch/flexsearch.bundle.module.min.js'; + import {makeSearchIndex, searchSpec} from '../shared-util/search-spec.js'; import {empty, groupArray, stitchArrays, unique, withEntries} from '../shared-util/sugar.js'; -import FlexSearch from '../lib/flexsearch/flexsearch.bundle.module.min.js'; +import {loadDependency} from './module-import-shims.js'; + +// Will be loaded from dependencies. +let decompress; +let unpack; let status = null; let indexes = null; @@ -10,14 +16,27 @@ let indexes = null; onmessage = handleWindowMessage; postStatus('alive'); -main().then( - () => { - postStatus('ready'); - }, - error => { - console.error(`Search worker setup error:`, error); - postStatus('setup-error'); - }); +loadDependencies() + .then(main) + .then( + () => { + postStatus('ready'); + }, + error => { + console.error(`Search worker setup error:`, error); + postStatus('setup-error'); + }); + +async function loadDependencies() { + const {compressJSON} = + await loadDependency.fromWindow('../lib/compress-json/bundle.min.js'); + + const msgpackr = + await loadDependency.fromModuleExports('../lib/msgpackr/index.js'); + + ({decompress} = compressJSON); + ({unpack} = msgpackr); +} function rebase(path) { return `/search-data/` + path; @@ -38,8 +57,11 @@ async function main() { await Promise.all( Object.entries(indexData) .map(([key, _info]) => - fetch(rebase(key + '.json')) - .then(res => res.json()) + fetch(rebase(key + '.json.msgpack')) + .then(res => res.arrayBuffer()) + .then(buffer => new Uint8Array(buffer)) + .then(data => unpack(data)) + .then(data => decompress(data)) .then(data => { importIndex(key, data); }))); diff --git a/src/web-routes.js b/src/web-routes.js index 8bb2fba3..7e08d06f 100644 --- a/src/web-routes.js +++ b/src/web-routes.js @@ -59,9 +59,19 @@ export const dependencyRoutes = [ name: 'chroma-js', }), + quickNodeDependency({ + name: 'compress-json', + path: '..', // exit dist, access bundle.js + }), + quickNodeDependency({ name: 'flexsearch', }), + + quickNodeDependency({ + name: 'msgpackr', + path: 'dist', + }), ].flat(); export const allStaticWebRoutes = [ -- cgit 1.3.0-6-gf8a5