diff options
Diffstat (limited to 'src/common-util')
-rw-r--r-- | src/common-util/search-shape.js | 58 | ||||
-rw-r--r-- | src/common-util/search-spec.js | 259 | ||||
-rw-r--r-- | src/common-util/sort.js | 28 | ||||
-rw-r--r-- | src/common-util/sugar.js | 134 | ||||
-rw-r--r-- | src/common-util/wiki-data.js | 108 |
5 files changed, 284 insertions, 303 deletions
diff --git a/src/common-util/search-shape.js b/src/common-util/search-shape.js new file mode 100644 index 00000000..e0819ed6 --- /dev/null +++ b/src/common-util/search-shape.js @@ -0,0 +1,58 @@ +// Index structures shared by client and server, and relevant interfaces. +// First and foremost, this is complemented by src/search-select.js, which +// actually fills the search indexes up with stuff. During build this all +// gets consumed by src/search.js to make an index, fill it with stuff +// (as described by search-select.js), and export it to disk; then on +// the client that export is consumed by src/static/js/search-worker.js, +// which builds an index in the same shape and imports the data for query. + +const baselineStore = [ + 'primaryName', + 'disambiguator', + 'artwork', + 'color', +]; + +const genericStore = baselineStore; + +const searchShape = { + generic: { + index: [ + 'primaryName', + 'parentName', + 'artTags', + 'additionalNames', + 'contributors', + 'groups', + ].map(field => ({field, tokenize: 'forward'})), + + store: genericStore, + }, + + verbatim: { + index: [ + 'primaryName', + 'parentName', + 'artTags', + 'additionalNames', + 'contributors', + 'groups', + ], + + store: genericStore, + }, +}; + +export default searchShape; + +export function makeSearchIndex(descriptor, {FlexSearch}) { + return new FlexSearch.Document({ + id: 'reference', + index: descriptor.index, + store: descriptor.store, + + // Disable scoring, always return results according to provided order + // (specified above in `genericQuery`, etc). + resolution: 1, + }); +} diff --git a/src/common-util/search-spec.js b/src/common-util/search-spec.js deleted file mode 100644 index 75de0d16..00000000 --- a/src/common-util/search-spec.js +++ /dev/null @@ -1,259 +0,0 @@ -// Index structures shared by client and server, and relevant interfaces. - -function getArtworkPath(thing) { - switch (thing.constructor[Symbol.for('Thing.referenceType')]) { - case 'album': { - return [ - 'media.albumCover', - thing.directory, - thing.coverArtFileExtension, - ]; - } - - case 'flash': { - return [ - 'media.flashArt', - thing.directory, - thing.coverArtFileExtension, - ]; - } - - case 'track': { - if (thing.hasUniqueCoverArt) { - return [ - 'media.trackCover', - thing.album.directory, - thing.directory, - thing.coverArtFileExtension, - ]; - } else if (thing.album.hasCoverArt) { - return [ - 'media.albumCover', - thing.album.directory, - thing.album.coverArtFileExtension, - ]; - } else { - return null; - } - } - - default: - return null; - } -} - -function prepareArtwork(thing, { - checkIfImagePathHasCachedThumbnails, - getThumbnailEqualOrSmaller, - urls, -}) { - const hasWarnings = - thing.artTags?.some(artTag => artTag.isContentWarning); - - const artworkPath = - getArtworkPath(thing); - - if (!artworkPath) { - return undefined; - } - - const mediaSrc = - urls - .from('media.root') - .to(...artworkPath); - - if (!checkIfImagePathHasCachedThumbnails(mediaSrc)) { - return undefined; - } - - const selectedSize = - getThumbnailEqualOrSmaller( - (hasWarnings ? 'mini' : 'adorb'), - mediaSrc); - - const mediaSrcJpeg = - mediaSrc.replace(/\.(png|jpg)$/, `.${selectedSize}.jpg`); - - const displaySrc = - urls - .from('thumb.root') - .to('thumb.path', mediaSrcJpeg); - - const serializeSrc = - displaySrc.replace(thing.directory, '<>'); - - return serializeSrc; -} - -export const searchSpec = { - generic: { - query: ({ - albumData, - artTagData, - artistData, - flashData, - groupData, - trackData, - }) => [ - albumData, - - artTagData, - - artistData - .filter(artist => !artist.isAlias), - - flashData, - - groupData, - - trackData - // Exclude rereleases - there's no reasonable way to differentiate - // them from the main release as part of this query. - .filter(track => !track.mainReleaseTrack), - ].flat(), - - process(thing, opts) { - const fields = {}; - - fields.primaryName = - thing.name; - - const kind = - thing.constructor[Symbol.for('Thing.referenceType')]; - - fields.parentName = - (kind === 'track' - ? thing.album.name - : kind === 'group' - ? thing.category.name - : kind === 'flash' - ? thing.act.name - : null); - - fields.color = - thing.color; - - fields.artTags = - (thing.constructor.hasPropertyDescriptor('artTags') - ? thing.artTags.map(artTag => artTag.nameShort) - : []); - - fields.additionalNames = - (thing.constructor.hasPropertyDescriptor('additionalNames') - ? thing.additionalNames.map(entry => entry.name) - : thing.constructor.hasPropertyDescriptor('aliasNames') - ? thing.aliasNames - : []); - - const contribKeys = [ - 'artistContribs', - 'bannerArtistContribs', - 'contributorContribs', - 'coverArtistContribs', - 'wallpaperArtistContribs', - ]; - - const contributions = - contribKeys - .filter(key => Object.hasOwn(thing, key)) - .flatMap(key => thing[key]); - - fields.contributors = - contributions - .flatMap(({artist}) => [ - artist.name, - ...artist.aliasNames, - ]); - - const groups = - (Object.hasOwn(thing, 'groups') - ? thing.groups - : Object.hasOwn(thing, 'album') - ? thing.album.groups - : []); - - const mainContributorNames = - contributions - .map(({artist}) => artist.name); - - fields.groups = - groups - .filter(group => !mainContributorNames.includes(group.name)) - .map(group => group.name); - - fields.artwork = - prepareArtwork(thing, opts); - - return fields; - }, - - index: [ - 'primaryName', - 'parentName', - 'artTags', - 'additionalNames', - 'contributors', - 'groups', - ], - - store: [ - 'primaryName', - 'artwork', - 'color', - ], - }, -}; - -export function makeSearchIndex(descriptor, {FlexSearch}) { - return new FlexSearch.Document({ - id: 'reference', - index: descriptor.index, - store: descriptor.store, - }); -} - -// TODO: This function basically mirrors bind-utilities.js, which isn't -// exactly robust, but... binding might need some more thought across the -// codebase in *general.* -function bindSearchUtilities({ - checkIfImagePathHasCachedThumbnails, - getThumbnailEqualOrSmaller, - thumbsCache, - urls, -}) { - const bound = { - urls, - }; - - bound.checkIfImagePathHasCachedThumbnails = - (imagePath) => - checkIfImagePathHasCachedThumbnails(imagePath, thumbsCache); - - bound.getThumbnailEqualOrSmaller = - (preferred, imagePath) => - getThumbnailEqualOrSmaller(preferred, imagePath, thumbsCache); - - return bound; -} - -export function populateSearchIndex(index, descriptor, opts) { - const {wikiData} = opts; - const bound = bindSearchUtilities(opts); - - const collection = descriptor.query(wikiData); - - for (const thing of collection) { - const reference = thing.constructor.getReference(thing); - - let processed; - try { - processed = descriptor.process(thing, bound); - } catch (caughtError) { - throw new Error( - `Failed to process searchable thing ${reference}`, - {cause: caughtError}); - } - - index.add({reference, ...processed}); - } -} diff --git a/src/common-util/sort.js b/src/common-util/sort.js index fd382033..bbe4e551 100644 --- a/src/common-util/sort.js +++ b/src/common-util/sort.js @@ -370,11 +370,12 @@ export function sortAlbumsTracksChronologically(data, { getDate, } = {}) { // Sort albums before tracks... - sortByConditions(data, [(t) => t.album === undefined]); + sortByConditions(data, [t => t.isAlbum]); - // Group tracks by album... - sortByDirectory(data, { - getDirectory: (t) => (t.album ? t.album.directory : t.directory), + // Put albums alphabetically, and group with them... + sortAlphabetically(data, { + getDirectory: t => t.isTrack ? t.album.directory : t.directory, + getName: t => t.isTrack ? t.album.name : t.name, }); // Sort tracks by position in album... @@ -389,6 +390,22 @@ export function sortAlbumsTracksChronologically(data, { return data; } +export function sortArtworksChronologically(data, { + latestFirst = false, +} = {}) { + // Artworks conveniently describe their things as artwork.thing, so they + // work in sortEntryThingPairs. (Yes, this is just assuming the artworks + // are only for albums and tracks... sorry... TODO...) + sortEntryThingPairs(data, things => + sortAlbumsTracksChronologically(things, {latestFirst})); + + // Artworks' own dates always matter before however the thing places itself, + // and accommodate per-thing properties like coverArtDate anyway. + sortByDate(data, {latestFirst}); + + return data; +} + export function sortFlashesChronologically(data, { latestFirst = false, getDate, @@ -413,6 +430,7 @@ export function sortFlashesChronologically(data, { export function sortContributionsChronologically(data, sortThings, { latestFirst = false, + getThing = contrib => contrib.thing, } = {}) { // Contributions only have one date property (which is provided when // the contribution is created). They're sorted by this most primarily, @@ -421,7 +439,7 @@ export function sortContributionsChronologically(data, sortThings, { const entries = data.map(contrib => ({ entry: contrib, - thing: contrib.thing, + thing: getThing(contrib), })); sortEntryThingPairs( diff --git a/src/common-util/sugar.js b/src/common-util/sugar.js index 66e160aa..354cf5cc 100644 --- a/src/common-util/sugar.js +++ b/src/common-util/sugar.js @@ -70,6 +70,16 @@ export function pick(array) { return array[Math.floor(Math.random() * array.length)]; } +// Gets the only item in a single-item array (strictly, length === 1). +// If the array has more than one item, or is empty, this is null. +export function onlyItem(array) { + if (array.length === 1) { + return array[0]; + } else { + return null; + } +} + // Gets the item at an index relative to another index. export function atOffset(array, index, offset, { wrap = false, @@ -116,10 +126,14 @@ export function findIndexOrEnd(array, fn) { // returns null (or values in the array are nullish), they'll just be skipped in // the sum. export function accumulateSum(array, fn = x => x) { + if (!Array.isArray(array)) { + return accumulateSum(Array.from(array, fn)); + } + return array.reduce( (accumulator, value, index, array) => accumulator + - fn(value, index, array) ?? 0, + (fn(value, index, array) ?? 0), 0); } @@ -221,6 +235,9 @@ export const compareArrays = (arr1, arr2, {checkOrder = true} = {}) => ? arr1.every((x, i) => arr2[i] === x) : arr1.every((x) => arr2.includes(x))); +export const exhaust = (generatorFunction) => + Array.from(generatorFunction()); + export function compareObjects(obj1, obj2, { checkOrder = false, checkSymbols = true, @@ -251,11 +268,20 @@ export function compareObjects(obj1, obj2, { // Stolen from jq! Which pro8a8ly stole the concept from other places. Nice. export const withEntries = (obj, fn) => { - const result = fn(Object.entries(obj)); - if (result instanceof Promise) { - return result.then(entries => Object.fromEntries(entries)); + if (obj instanceof Map) { + const result = fn(Array.from(obj.entries())); + if (result instanceof Promise) { + return result.then(entries => new Map(entries)); + } else { + return new Map(result); + } } else { - return Object.fromEntries(result); + const result = fn(Object.entries(obj)); + if (result instanceof Promise) { + return result.then(entries => Object.fromEntries(entries)); + } else { + return Object.fromEntries(result); + } } } @@ -299,34 +325,74 @@ export function filterProperties(object, properties, { return filteredObject; } -export function queue(array, max = 50) { - if (max === 0) { - return array.map((fn) => fn()); +export function queue(functionList, queueSize = 50) { + if (queueSize === 0) { + return functionList.map(fn => fn()); } - const begin = []; - let current = 0; - const ret = array.map( - (fn) => - new Promise((resolve, reject) => { - begin.push(() => { - current++; - Promise.resolve(fn()).then((value) => { - current--; - if (current < max && begin.length) { - begin.shift()(); - } - resolve(value); - }, reject); - }); - }) - ); + const promiseList = []; + const resolveList = []; + const rejectList = []; - for (let i = 0; i < max && begin.length; i++) { - begin.shift()(); + for (let i = 0; i < functionList.length; i++) { + const promiseWithResolvers = Promise.withResolvers(); + promiseList.push(promiseWithResolvers.promise); + resolveList.push(promiseWithResolvers.resolve); + rejectList.push(promiseWithResolvers.reject); } - return ret; + let cursor = 0; + let running = 0; + + const next = async () => { + if (running >= queueSize) { + return; + } + + if (cursor === functionList.length) { + return; + } + + const thisFunction = functionList[cursor]; + const thisResolve = resolveList[cursor]; + const thisReject = rejectList[cursor]; + + delete functionList[cursor]; + delete resolveList[cursor]; + delete rejectList[cursor]; + + cursor++; + running++; + + try { + thisResolve(await thisFunction()); + } catch (error) { + thisReject(error); + } finally { + running--; + + // If the cursor is at 1, this is the first promise that resolved, + // so we're now done the "kick start", and can start the remaining + // promises (up to queueSize). + if (cursor === 1) { + // Since only one promise is used for the "kick start", and that one + // has just resolved, we know there's none running at all right now, + // and can start as many as specified in the queueSize right away. + for (let i = 0; i < queueSize; i++) { + next(); + } + } else { + next(); + } + } + }; + + // Only start a single promise, as a "kick start", so that it resolves as + // early as possible (it will resolve before we use CPU to start the rest + // of the promises, up to queueSize). + next(); + + return promiseList; } export function delay(ms) { @@ -357,15 +423,23 @@ export function splitKeys(key) { // Follows a key path like 'foo.bar.baz' to get an item nested deeply inside // an object. If a value partway through the chain is an array, the values -// down the rest of the chain are gotten for each item in the array. +// down the rest of the chain are gotten for each item in the array. If a value +// partway through the chain is missing the next key, the chain stops and is +// undefined (or null) at that point. // // obj: {x: [{y: ['a']}, {y: ['b', 'c']}]} // key: 'x.y' // -> [['a'], ['b', 'c']] // +// obj: {x: [{y: ['a']}, {y: ['b', 'c']}, {z: ['d', 'e']}]} +// key: 'x.z' +// -> [undefined, undefined, ['d', 'e']] +// export function getNestedProp(obj, key) { const recursive = (o, k) => - (k.length === 1 + (o === undefined || o === null + ? o + : k.length === 1 ? o[k[0]] : Array.isArray(o[k[0]]) ? o[k[0]].map(v => recursive(v, k.slice(1))) diff --git a/src/common-util/wiki-data.js b/src/common-util/wiki-data.js index 4bbef8ab..3fde2495 100644 --- a/src/common-util/wiki-data.js +++ b/src/common-util/wiki-data.js @@ -11,7 +11,7 @@ export {filterMultipleArrays} from './sugar.js'; // Generic value operations -export function getKebabCase(name) { +export function getCaseSensitiveKebabCase(name) { return name // Spaces to dashes @@ -34,6 +34,9 @@ export function getKebabCase(name) { // General punctuation which always separates surrounding words .replace(/[/@#$%*()_=,[\]{}|\\;:<>?`~]/g, '-') + // More punctuation which always separates surrounding words + .replace(/[\u{2013}-\u{2014}]/u, '-') // En Dash, Em Dash + // Accented characters .replace(/[áâäàå]/gi, 'a') .replace(/[çč]/gi, 'c') @@ -50,17 +53,17 @@ export function getKebabCase(name) { // Trim dashes on boundaries .replace(/^-+|-+$/g, '') +} - // Always lowercase - .toLowerCase(); +export function getKebabCase(name) { + return getCaseSensitiveKebabCase(name).toLowerCase(); } // Specific data utilities -// Matches heading details from commentary data in roughly the formats: +// Matches heading details from commentary data in roughly the format: // -// <i>artistReference:</i> (annotation, date) -// <i>artistReference|artistDisplayText:</i> (annotation, date) +// <i>artistText:</i> (annotation, date) // // where capturing group "annotation" can be any text at all, except that the // last entry (past a comma or the only content within parentheses), if parsed @@ -83,8 +86,9 @@ export function getKebabCase(name) { // parentheses can be part of the actual annotation content. // // Capturing group "artistReference" is all the characters between <i> and </i> -// (apart from the pipe and "artistDisplayText" text, if present), and is either -// the name of an artist or an "artist:directory"-style reference. +// (apart from the pipe and the "artistText" group, if present), and is either +// the name of one or more artist or "artist:directory"-style references, +// joined by commas, if multiple. // // This regular expression *doesn't* match bodies, which will need to be parsed // out of the original string based on the indices matched using this. @@ -94,7 +98,7 @@ const dateRegex = groupName => String.raw`(?<${groupName}>[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4})`; const commentaryRegexRaw = - String.raw`^<i>(?<artistReferences>.+?)(?:\|(?<artistDisplayText>.+))?:<\/i>(?: \((?<annotation>(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?:(?<dateKind>sometime|throughout|around) )?${dateRegex('date')}(?: ?- ?${dateRegex('secondDate')})?(?: (?<accessKind>captured|accessed) ${dateRegex('accessDate')})?)?\))?`; + String.raw`^<i>(?<artistText>.+?):<\/i>(?: \((?<annotation>(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?:(?<dateKind>sometime|throughout|around) )?${dateRegex('date')}(?: ?- ?${dateRegex('secondDate')})?(?: (?<accessKind>captured|accessed) ${dateRegex('accessDate')})?)?\))?`; export const commentaryRegexCaseInsensitive = new RegExp(commentaryRegexRaw, 'gmi'); export const commentaryRegexCaseSensitive = @@ -102,6 +106,43 @@ export const commentaryRegexCaseSensitive = export const commentaryRegexCaseSensitiveOneShot = new RegExp(commentaryRegexRaw); +// The #validators function isOldStyleLyrics() describes +// what this regular expression detects against. +export const multipleLyricsDetectionRegex = + /^<i>.*:<\/i>/m; + +export function matchContentEntries(sourceText) { + const matchEntries = []; + + let previousMatchEntry = null; + let previousEndIndex = null; + + const trimBody = body => + body + .replace(/^\n*/, '') + .replace(/\n*$/, ''); + + for (const {0: matchText, index: startIndex, groups: matchEntry} + of sourceText.matchAll(commentaryRegexCaseSensitive)) { + if (previousMatchEntry) { + previousMatchEntry.body = + trimBody(sourceText.slice(previousEndIndex, startIndex)); + } + + matchEntries.push(matchEntry); + + previousMatchEntry = matchEntry; + previousEndIndex = startIndex + matchText.length; + } + + if (previousMatchEntry) { + previousMatchEntry.body = + trimBody(sourceText.slice(previousEndIndex)); + } + + return matchEntries; +} + export function filterAlbumsByCommentary(albums) { return albums .filter((album) => [album, ...album.tracks].some((x) => x.commentary)); @@ -492,3 +533,52 @@ export function combineWikiDataArrays(arrays) { return combined; } } + +// Markdown stuff + +export function* matchMarkdownLinks(markdownSource, {marked}) { + const plausibleLinkRegexp = /\[(?=.*?\))/g; + + // Pedantic rules use more particular parentheses detection in link + // destinations - they allow one level of balanced parentheses, and + // otherwise, parentheses must be escaped. This allows for entire links + // to be wrapped in parentheses, e.g below: + // + // This is so cool. ([You know??](https://example.com)) + // + const definiteLinkRegexp = marked.Lexer.rules.inline.pedantic.link; + + let plausibleMatch = null; + while (plausibleMatch = plausibleLinkRegexp.exec(markdownSource)) { + const definiteMatch = + definiteLinkRegexp.exec(markdownSource.slice(plausibleMatch.index)); + + if (!definiteMatch) { + continue; + } + + const [{length}, label, href] = definiteMatch; + const index = plausibleMatch.index + definiteMatch.index; + + yield {label, href, index, length}; + } +} + +export function* matchInlineLinks(source) { + const plausibleLinkRegexp = /\b[a-z]*:\/\/[^ ]*?(?=(?:[,.!?]*)(?:\s|$))/gm; + + let plausibleMatch = null; + while (plausibleMatch = plausibleLinkRegexp.exec(source)) { + const [href] = plausibleMatch; + const {index} = plausibleMatch; + const [{length}] = plausibleMatch; + + try { + new URL(href); + } catch { + continue; + } + + yield {href, length, index}; + } +} |