diff options
-rw-r--r-- | src/common-util/search-spec.js | 214 | ||||
-rw-r--r-- | src/static/js/client/sidebar-search.js | 36 | ||||
-rw-r--r-- | src/static/js/search-worker.js | 174 |
3 files changed, 272 insertions, 152 deletions
diff --git a/src/common-util/search-spec.js b/src/common-util/search-spec.js index 43d27846..af5ec201 100644 --- a/src/common-util/search-spec.js +++ b/src/common-util/search-spec.js @@ -85,104 +85,116 @@ function prepareArtwork(thing, { return serializeSrc; } -export const searchSpec = { - generic: { - query: ({ - albumData, - artTagData, - artistData, - flashData, - groupData, - trackData, - }) => [ - albumData, - - artTagData, - - artistData - .filter(artist => !artist.isAlias), - - flashData, - - groupData, - - trackData - // Exclude rereleases - there's no reasonable way to differentiate - // them from the main release as part of this query. - .filter(track => !track.mainReleaseTrack), - ].flat(), - - process(thing, opts) { - const fields = {}; - - fields.primaryName = - thing.name; - - const kind = - thing.constructor[Symbol.for('Thing.referenceType')]; - - fields.parentName = - (kind === 'track' - ? thing.album.name - : kind === 'group' - ? thing.category.name - : kind === 'flash' - ? thing.act.name - : null); - - fields.color = - thing.color; - - fields.artTags = - (thing.constructor.hasPropertyDescriptor('artTags') - ? thing.artTags.map(artTag => artTag.nameShort) - : []); - - fields.additionalNames = - (thing.constructor.hasPropertyDescriptor('additionalNames') - ? thing.additionalNames.map(entry => entry.name) - : thing.constructor.hasPropertyDescriptor('aliasNames') - ? thing.aliasNames - : []); - - const contribKeys = [ - 'artistContribs', - 'contributorContribs', - ]; +function baselineProcess(thing, opts) { + const fields = {}; + + fields.primaryName = + thing.name; - const contributions = - contribKeys - .filter(key => Object.hasOwn(thing, key)) - .flatMap(key => thing[key]); + fields.artwork = + prepareArtwork(thing, opts); + + fields.color = + thing.color; + + return fields; +} - fields.contributors = - contributions - .flatMap(({artist}) => [ - artist.name, - ...artist.aliasNames, - ]); +const baselineStore = [ + 'primaryName', + 'artwork', + 'color', +]; - const groups = - (Object.hasOwn(thing, 'groups') - ? thing.groups - : Object.hasOwn(thing, 'album') - ? thing.album.groups - : []); +function genericQuery(wikiData) { + return [ + wikiData.albumData, - const mainContributorNames = - contributions - .map(({artist}) => artist.name); + wikiData.artTagData, - fields.groups = - groups - .filter(group => !mainContributorNames.includes(group.name)) - .map(group => group.name); + wikiData.artistData + .filter(artist => !artist.isAlias), + + wikiData.flashData, + + wikiData.groupData, + + wikiData.trackData + // Exclude rereleases - there's no reasonable way to differentiate + // them from the main release as part of this query. + .filter(track => !track.mainReleaseTrack), + ].flat(); +} + +function genericProcess(thing, opts) { + const fields = baselineProcess(thing, opts); + + const kind = + thing.constructor[Symbol.for('Thing.referenceType')]; + + fields.parentName = + (kind === 'track' + ? thing.album.name + : kind === 'group' + ? thing.category.name + : kind === 'flash' + ? thing.act.name + : null); + + fields.artTags = + (thing.constructor.hasPropertyDescriptor('artTags') + ? thing.artTags.map(artTag => artTag.nameShort) + : []); + + fields.additionalNames = + (thing.constructor.hasPropertyDescriptor('additionalNames') + ? thing.additionalNames.map(entry => entry.name) + : thing.constructor.hasPropertyDescriptor('aliasNames') + ? thing.aliasNames + : []); + + const contribKeys = [ + 'artistContribs', + 'contributorContribs', + ]; + + const contributions = + contribKeys + .filter(key => Object.hasOwn(thing, key)) + .flatMap(key => thing[key]); + + fields.contributors = + contributions + .flatMap(({artist}) => [ + artist.name, + ...artist.aliasNames, + ]); + + const groups = + (Object.hasOwn(thing, 'groups') + ? thing.groups + : Object.hasOwn(thing, 'album') + ? thing.album.groups + : []); + + const mainContributorNames = + contributions + .map(({artist}) => artist.name); + + fields.groups = + groups + .filter(group => !mainContributorNames.includes(group.name)) + .map(group => group.name); + + return fields; +} - fields.artwork = - prepareArtwork(thing, opts); +const genericStore = baselineStore; - return fields; - }, +export const searchSpec = { + generic: { + query: genericQuery, + process: genericProcess, index: [ 'primaryName', @@ -191,13 +203,25 @@ export const searchSpec = { 'additionalNames', 'contributors', 'groups', - ], + ].map(field => ({field, tokenize: 'forward'})), - store: [ + store: genericStore, + }, + + verbatim: { + query: genericQuery, + process: genericProcess, + + index: [ 'primaryName', - 'artwork', - 'color', + 'parentName', + 'artTags', + 'additionalNames', + 'contributors', + 'groups', ], + + store: genericStore, }, }; diff --git a/src/static/js/client/sidebar-search.js b/src/static/js/client/sidebar-search.js index 42267a9a..b6008d28 100644 --- a/src/static/js/client/sidebar-search.js +++ b/src/static/js/client/sidebar-search.js @@ -772,7 +772,7 @@ function showSidebarSearchFailed() { function showSidebarSearchResults(results) { const {session} = info; - console.debug(`Showing search results:`, flattenResults(results)); + console.debug(`Showing search results:`, tidyResults(results)); showSearchSidebarColumn(); @@ -805,31 +805,27 @@ function showSidebarSearchResults(results) { restoreSidebarSearchResultsScrollOffset(); } -function flattenResults(results) { - const flatResults = - Object.entries(results) - .filter(([index]) => index === 'generic') - .flatMap(([index, results]) => results - .flatMap(({doc, id}) => ({ - index, - reference: id ?? null, - referenceType: (id ? id.split(':')[0] : null), - directory: (id ? id.split(':')[1] : null), - data: doc, - }))); - - return flatResults; +function tidyResults(results) { + const tidiedResults = + results.map(({doc, id}) => ({ + reference: id ?? null, + referenceType: (id ? id.split(':')[0] : null), + directory: (id ? id.split(':')[1] : null), + data: doc, + })); + + return tidiedResults; } function fillResultElements(results, { filterType = null, } = {}) { - const flatResults = flattenResults(results); + const tidiedResults = tidyResults(results); const filteredResults = (filterType - ? flatResults.filter(result => result.referenceType === filterType) - : flatResults); + ? tidiedResults.filter(result => result.referenceType === filterType) + : tidiedResults); while (info.results.firstChild) { info.results.firstChild.remove(); @@ -853,10 +849,10 @@ function fillResultElements(results, { } function showFilterElements(results) { - const flatResults = flattenResults(results); + const tidiedResults = tidyResults(results); const allReferenceTypes = - unique(flatResults.map(result => result.referenceType)); + unique(tidiedResults.map(result => result.referenceType)); let shownAny = false; diff --git a/src/static/js/search-worker.js b/src/static/js/search-worker.js index 1b4684ad..5ecb6eb4 100644 --- a/src/static/js/search-worker.js +++ b/src/static/js/search-worker.js @@ -371,56 +371,60 @@ function postActionResult(id, status, value) { } function performSearchAction({query, options}) { - const {generic, ...otherIndexes} = indexes; + const {generic, verbatim} = indexes; const genericResults = queryGenericIndex(generic, query, options); - const otherResults = - withEntries(otherIndexes, entries => entries - .map(([indexName, index]) => [ - indexName, - index.search(query, options), - ])); + const verbatimResults = + queryVerbatimIndex(verbatim, query, options); - return { - generic: genericResults, - ...otherResults, - }; + const verbatimIDs = + new Set(verbatimResults?.map(result => result.id)); + + const commonResults = + (verbatimResults && genericResults + ? genericResults + .filter(({id}) => verbatimIDs.has(id)) + : verbatimResults ?? genericResults); + + return commonResults; } -function queryGenericIndex(index, query, options) { - const interestingFieldCombinations = [ - ['primaryName', 'parentName', 'groups'], - ['primaryName', 'parentName'], - ['primaryName', 'groups', 'contributors'], - ['primaryName', 'groups', 'artTags'], - ['primaryName', 'groups'], - ['primaryName', 'contributors'], - ['primaryName', 'artTags'], - ['parentName', 'groups', 'artTags'], - ['parentName', 'artTags'], - ['groups', 'contributors'], - ['groups', 'artTags'], - - // This prevents just matching *everything* tagged "john" if you - // only search "john", but it actually supports matching more than - // *two* tags at once: "john rose lowas" works! This is thanks to - // flexsearch matching multiple field values in a single query. - ['artTags', 'artTags'], - - ['contributors', 'parentName'], - ['contributors', 'groups'], - ['primaryName', 'contributors'], - ['primaryName'], - ]; +const interestingFieldCombinations = [ + ['primaryName', 'parentName', 'groups'], + ['primaryName', 'parentName'], + ['primaryName', 'groups', 'contributors'], + ['primaryName', 'groups', 'artTags'], + ['primaryName', 'groups'], + ['primaryName', 'contributors'], + ['primaryName', 'artTags'], + ['parentName', 'groups', 'artTags'], + ['parentName', 'artTags'], + ['groups', 'contributors'], + ['groups', 'artTags'], + + // This prevents just matching *everything* tagged "john" if you + // only search "john", but it actually supports matching more than + // *two* tags at once: "john rose lowas" works! This is thanks to + // flexsearch matching multiple field values in a single query. + ['artTags', 'artTags'], + + ['contributors', 'parentName'], + ['contributors', 'groups'], + ['primaryName', 'contributors'], + ['primaryName'], +]; +function queryGenericIndex(index, query, options) { const interestingFields = unique(interestingFieldCombinations.flat()); const {genericTerms, queriedKind} = processTerms(query); + if (empty(genericTerms)) return null; + const particles = particulate(genericTerms); @@ -499,6 +503,93 @@ function queryGenericIndex(index, query, options) { return constitutedAndFiltered; } +function queryVerbatimIndex(index, query, options) { + const interestingFields = + unique(interestingFieldCombinations.flat()); + + const {verbatimTerms, queriedKind} = + processTerms(query); + + if (empty(verbatimTerms)) return null; + + const particles = + particulate(verbatimTerms); + + const groupedParticles = + groupArray(particles, ({length}) => length); + + const queriesBy = keys => + (groupedParticles.get(keys.length) ?? []) + .flatMap(permutations) + .map(values => values.map(({terms}) => terms.join(' '))) + .map(values => + stitchArrays({ + field: keys, + query: values, + })); + + const boilerplate = queryBoilerplate(index); + + const particleResults = + Object.fromEntries( + interestingFields.map(field => [ + field, + Object.fromEntries( + particles.flat() + .map(({terms}) => terms.join(' ')) + .map(query => [ + query, + new Set( + boilerplate + .query(query, { + ...options, + field, + limit: Infinity, + }) + .fieldResults[field]), + ])), + ])); + + const results = new Set(); + + for (const interestingFieldCombination of interestingFieldCombinations) { + for (const query of queriesBy(interestingFieldCombination)) { + const idToMatchingFieldsMap = new Map(); + for (const {field, query: fieldQuery} of query) { + for (const id of particleResults[field][fieldQuery]) { + if (idToMatchingFieldsMap.has(id)) { + idToMatchingFieldsMap.get(id).push(field); + } else { + idToMatchingFieldsMap.set(id, [field]); + } + } + } + + const commonAcrossFields = + Array.from(idToMatchingFieldsMap.entries()) + .filter(([id, matchingFields]) => + matchingFields.length === interestingFieldCombination.length) + .map(([id]) => id); + + for (const result of commonAcrossFields) { + results.add(result); + } + } + } + + const constituted = + boilerplate.constitute(results); + + const constitutedAndFiltered = + constituted + .filter(({id}) => + (queriedKind + ? id.split(':')[0] === queriedKind + : true)); + + return constitutedAndFiltered; +} + function processTerms(query) { const kindTermSpec = [ {kind: 'album', terms: ['album']}, @@ -510,11 +601,14 @@ function processTerms(query) { ]; const genericTerms = []; + const verbatimTerms = []; let queriedKind = null; const termRegexp = new RegExp( String.raw`(?<kind>${kindTermSpec.flatMap(spec => spec.terms).join('|')})` + + String.raw`|(?<=^|\s)(?<quote>["'])(?<regularVerbatim>.+?)\k<quote>(?=$|\s)` + + String.raw`|(?<=^|\s)[“”‘’](?<curlyVerbatim>.+?)[“”‘’](?=$|\s)` + String.raw`|[^\s\-]+`, 'gi'); @@ -530,10 +624,16 @@ function processTerms(query) { continue; } + const verbatim = groups.regularVerbatim || groups.curlyVerbatim; + if (verbatim) { + verbatimTerms.push(verbatim); + continue; + } + genericTerms.push(match[0]); } - return {genericTerms, queriedKind}; + return {genericTerms, verbatimTerms, queriedKind}; } function particulate(terms) { |