From 5457b88f8d3d234af0b08d15f3c6249f6649aac3 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Fri, 16 Feb 2024 10:16:36 -0400 Subject: data: move sorting & generic functions out of #wiki-data --- src/util/wiki-data.js | 619 +------------------------------------------------- 1 file changed, 2 insertions(+), 617 deletions(-) (limited to 'src/util/wiki-data.js') diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index cdbffff..89c04a2 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -1,6 +1,7 @@ // Utility functions for interacting with wiki data. -import {accumulateSum, empty, unique} from './sugar.js'; +import {accumulateSum, empty} from './sugar.js'; +import {sortByDate} from './sort.js'; // Generic value operations @@ -15,622 +16,6 @@ export function getKebabCase(name) { .toLowerCase(); } -export function chunkByConditions(array, conditions) { - if (empty(array)) { - return []; - } - - if (empty(conditions)) { - return [array]; - } - - const out = []; - let cur = [array[0]]; - for (let i = 1; i < array.length; i++) { - const item = array[i]; - const prev = array[i - 1]; - let chunk = false; - for (const condition of conditions) { - if (condition(item, prev)) { - chunk = true; - break; - } - } - if (chunk) { - out.push(cur); - cur = [item]; - } else { - cur.push(item); - } - } - out.push(cur); - return out; -} - -export function chunkByProperties(array, properties) { - return chunkByConditions( - array, - properties.map((p) => (a, b) => { - if (a[p] instanceof Date && b[p] instanceof Date) return +a[p] !== +b[p]; - - if (a[p] !== b[p]) return true; - - // Not sure if this line is still necessary with the specific check for - // d8tes a8ove, 8ut, uh, keeping it anyway, just in case....? - if (a[p] != b[p]) return true; - - return false; - }) - ).map((chunk) => ({ - ...Object.fromEntries(properties.map((p) => [p, chunk[0][p]])), - chunk, - })); -} - -export function chunkMultipleArrays(...args) { - const arrays = args.slice(0, -1); - const fn = args.at(-1); - - if (arrays[0].length === 0) { - return []; - } - - const newChunk = index => arrays.map(array => [array[index]]); - const results = [newChunk(0)]; - - for (let i = 1; i < arrays[0].length; i++) { - const current = results.at(-1); - - const args = []; - for (let j = 0; j < arrays.length; j++) { - const item = arrays[j][i]; - const previous = current[j].at(-1); - args.push(item, previous); - } - - if (fn(...args)) { - results.push(newChunk(i)); - continue; - } - - for (let j = 0; j < arrays.length; j++) { - current[j].push(arrays[j][i]); - } - } - - return results; -} - -// Sorting functions - all utils here are mutating, so make sure to initially -// slice/filter/somehow generate a new array from input data if retaining the -// initial sort matters! (Spoilers: If what you're doing involves any kind of -// parallelization, it definitely matters.) - -// General sorting utilities! These don't do any sorting on their own but are -// handy in the sorting functions below (or if you're making your own sort). - -export function compareCaseLessSensitive(a, b) { - // Compare two strings without considering capitalization... unless they - // happen to be the same that way. - - const al = a.toLowerCase(); - const bl = b.toLowerCase(); - - return al === bl - ? a.localeCompare(b, undefined, {numeric: true}) - : al.localeCompare(bl, undefined, {numeric: true}); -} - -// Subtract common prefixes and other characters which some people don't like -// to have considered while sorting. The words part of this is English-only for -// now, which is totally evil. -export function normalizeName(s) { - // Turn (some) ligatures into expanded variant for cleaner sorting, e.g. - // "ff" into "ff", in decompose mode, so that "ü" is represented as two - // bytes ("u" + \u0308 combining diaeresis). - s = s.normalize('NFKD'); - - // Replace one or more whitespace of any kind in a row, as well as certain - // punctuation, with a single typical space, then trim the ends. - s = s - .replace( - /[\p{Separator}\p{Dash_Punctuation}\p{Connector_Punctuation}]+/gu, - ' ' - ) - .trim(); - - // Discard anything that isn't a letter, number, or space. - s = s.replace(/[^\p{Letter}\p{Number} ]/gu, '').trim(); - - // Remove common English (only, for now) prefixes. - s = s.replace(/^(?:an?|the) /i, ''); - - return s; -} - -// Sorts multiple arrays by an arbitrary function (which is the last argument). -// Paired values from each array are provided to the callback sequentially: -// -// (a_fromFirstArray, b_fromFirstArray, -// a_fromSecondArray, b_fromSecondArray, -// a_fromThirdArray, b_fromThirdArray) => -// relative positioning (negative, positive, or zero) -// -// Like native single-array sort, this is a mutating function. -export function sortMultipleArrays(...args) { - const arrays = args.slice(0, -1); - const fn = args.at(-1); - - const length = arrays[0].length; - const symbols = new Array(length).fill(null).map(() => Symbol()); - const indexes = Object.fromEntries(symbols.map((symbol, index) => [symbol, index])); - - symbols.sort((a, b) => { - const indexA = indexes[a]; - const indexB = indexes[b]; - - const args = []; - for (let i = 0; i < arrays.length; i++) { - args.push(arrays[i][indexA]); - args.push(arrays[i][indexB]); - } - - return fn(...args); - }); - - for (const array of arrays) { - // Note: We're mutating this array pulling values from itself, but only all - // at once after all those values have been pulled. - array.splice(0, array.length, ...symbols.map(symbol => array[indexes[symbol]])); - } - - return arrays; -} - -// Filters multiple arrays by an arbitrary function (which is the last argument). -// Values from each array are provided to the callback sequentially: -// -// (value_fromFirstArray, -// value_fromSecondArray, -// value_fromThirdArray, -// index, -// [firstArray, secondArray, thirdArray]) => -// true or false -// -// Please be aware that this is a mutating function, unlike native single-array -// filter. The mutated arrays are returned. Also attached under `.removed` are -// corresponding arrays of items filtered out. -export function filterMultipleArrays(...args) { - const arrays = args.slice(0, -1); - const fn = args.at(-1); - - const removed = new Array(arrays.length).fill(null).map(() => []); - - for (let i = arrays[0].length - 1; i >= 0; i--) { - const args = arrays.map(array => array[i]); - args.push(i, arrays); - - if (!fn(...args)) { - for (let j = 0; j < arrays.length; j++) { - const item = arrays[j][i]; - arrays[j].splice(i, 1); - removed[j].unshift(item); - } - } - } - - Object.assign(arrays, {removed}); - return arrays; -} - -// Reduces multiple arrays with an arbitrary function (which is the last -// argument). Note that this reduces into multiple accumulators, one for -// each input array, not just a single value. That's reflected in both the -// callback parameters: -// -// (accumulator1, -// accumulator2, -// value_fromFirstArray, -// value_fromSecondArray, -// index, -// [firstArray, secondArray]) => -// [newAccumulator1, newAccumulator2] -// -// As well as the final return value of reduceMultipleArrays: -// -// [finalAccumulator1, finalAccumulator2] -// -// This is not a mutating function. -export function reduceMultipleArrays(...args) { - const [arrays, fn, initialAccumulators] = - (typeof args.at(-1) === 'function' - ? [args.slice(0, -1), args.at(-1), null] - : [args.slice(0, -2), args.at(-2), args.at(-1)]); - - if (empty(arrays[0])) { - throw new TypeError(`Reduce of empty arrays with no initial value`); - } - - let [accumulators, i] = - (initialAccumulators - ? [initialAccumulators, 0] - : [arrays.map(array => array[0]), 1]); - - for (; i < arrays[0].length; i++) { - const args = [...accumulators, ...arrays.map(array => array[i])]; - args.push(i, arrays); - accumulators = fn(...args); - } - - return accumulators; -} - -// Component sort functions - these sort by one particular property, applying -// unique particulars where appropriate. Usually you don't want to use these -// directly, but if you're making a custom sort they can come in handy. - -// Universal method for sorting things into a predictable order, as directory -// is taken to be unique. There are two exceptions where this function (and -// thus any of the composite functions that start with it) *can't* be taken as -// deterministic: -// -// 1) Mixed data of two different Things, as directories are only taken as -// unique within one given class of Things. For example, this function -// won't be deterministic if its array contains both and -// . -// -// 2) Duplicate directories, or multiple instances of the "same" Thing. -// This function doesn't differentiate between two objects of the same -// directory, regardless of any other properties or the overall "identity" -// of the object. -// -// These exceptions are unavoidable except for not providing that kind of data -// in the first place, but you can still ensure the overall program output is -// deterministic by ensuring the input is arbitrarily sorted according to some -// other criteria - ex, although sortByDirectory itself isn't determinstic when -// given mixed track and album data, the final output (what goes on the site) -// will always be the same if you're doing sortByDirectory([...albumData, -// ...trackData]), because the initial sort places albums before tracks - and -// sortByDirectory will handle the rest, given all directories are unique -// except when album and track directories overlap with each other. -export function sortByDirectory(data, { - getDirectory = object => object.directory, -} = {}) { - const directories = data.map(getDirectory); - - sortMultipleArrays(data, directories, - (a, b, directoryA, directoryB) => - compareCaseLessSensitive(directoryA, directoryB)); - - return data; -} - -export function sortByName(data, { - getName = object => object.name, -} = {}) { - const names = data.map(getName); - const normalizedNames = names.map(normalizeName); - - sortMultipleArrays(data, normalizedNames, names, - ( - a, b, - normalizedA, normalizedB, - nonNormalizedA, nonNormalizedB, - ) => - compareNormalizedNames( - normalizedA, normalizedB, - nonNormalizedA, nonNormalizedB, - )); - - return data; -} - -export function compareNormalizedNames( - normalizedA, normalizedB, - nonNormalizedA, nonNormalizedB, -) { - const comparison = compareCaseLessSensitive(normalizedA, normalizedB); - return ( - (comparison === 0 - ? compareCaseLessSensitive(nonNormalizedA, nonNormalizedB) - : comparison)); -} - -export function sortByDate(data, { - getDate = object => object.date, - latestFirst = false, -} = {}) { - const dates = data.map(getDate); - - sortMultipleArrays(data, dates, - (a, b, dateA, dateB) => - compareDates(dateA, dateB, {latestFirst})); - - return data; -} - -export function compareDates(a, b, { - latestFirst = false, -} = {}) { - if (a && b) { - return (latestFirst ? b - a : a - b); - } - - // It's possible for objects with and without dates to be mixed - // together in the same array. If that's the case, we put all items - // without dates at the end. - if (a) return -1; - if (b) return 1; - - // If neither of the items being compared have a date, don't move - // them relative to each other. This is basically the same as - // filtering out all non-date items and then pushing them at the - // end after sorting the rest. - return 0; -} - -export function getLatestDate(dates) { - const filtered = dates.filter(Boolean); - if (empty(filtered)) return null; - - return filtered - .reduce( - (accumulator, date) => - date > accumulator ? date : accumulator, - -Infinity); -} - -export function getEarliestDate(dates) { - const filtered = dates.filter(Boolean); - if (empty(filtered)) return null; - - return filtered - .reduce( - (accumulator, date) => - date < accumulator ? date : accumulator, - Infinity); -} - -// Funky sort which takes a data set and a corresponding list of "counts", -// which are really arbitrary numbers representing some property of each data -// object defined by the caller. It sorts and mutates *both* of these, so the -// sorted data will still correspond to the same indexed count. -export function sortByCount(data, counts, { - greatestFirst = false, -} = {}) { - sortMultipleArrays(data, counts, (data1, data2, count1, count2) => - (greatestFirst - ? count2 - count1 - : count1 - count2)); - - return data; -} - -// Corresponding filter function for the above sort. By default, items whose -// corresponding count is zero will be removed. -export function filterByCount(data, counts, { - min = 1, - max = Infinity, -} = {}) { - filterMultipleArrays(data, counts, (data, count) => - count >= min && count <= max); -} - -export function sortByPositionInParent(data, { - getParent, - getChildren, -}) { - return data.sort((a, b) => { - const parentA = getParent(a); - const parentB = getParent(b); - - // Don't change the sort when the two items are from separate parents. - // This function doesn't change the order of parents or try to "merge" - // two separated chunks of items from the same parent together. - if (parentA !== parentB) { - return 0; - } - - // Don't change the sort when either (or both) of the items doesn't - // even have a parent (e.g. it's the passed data is a mixed array of - // children and parents). - if (!parentA || !parentB) { - return 0; - } - - const indexA = getChildren(parentA).indexOf(a); - const indexB = getChildren(parentB).indexOf(b); - - // If the getParent/getChildren relationship doesn't go both ways for - // some reason, don't change the sort. - if (indexA === -1 || indexB === -1) { - return 0; - } - - return indexA - indexB; - }); -} - -export function sortByPositionInAlbum(data) { - return sortByPositionInParent(data, { - getParent: track => track.album, - getChildren: album => album.tracks, - }); -} - -export function sortByPositionInFlashAct(data) { - return sortByPositionInParent(data, { - getParent: flash => flash.act, - getChildren: act => act.flashes, - }); -} - -// Sorts data so that items are grouped together according to whichever of a -// set of arbitrary given conditions is true first. If no conditions are met -// for a given item, it's moved over to the end! -export function sortByConditions(data, conditions) { - return data.sort((a, b) => { - const ai = conditions.findIndex((f) => f(a)); - const bi = conditions.findIndex((f) => f(b)); - - if (ai >= 0 && bi >= 0) { - return ai - bi; - } else if (ai >= 0) { - return -1; - } else if (bi >= 0) { - return 1; - } else { - return 0; - } - }); -} - -// Composite sorting functions - these consider multiple properties, generally -// always returning the same output regardless of how the input was originally -// sorted (or left unsorted). If you're working with arbitrarily sorted inputs -// (typically wiki data, either in full or unsorted filter), these make sure -// what gets put on the actual website (or wherever) is deterministic. Also -// they're just handy sorting utilities. -// -// Note that because these are each comprised of multiple component sorting -// functions, they expect more than just one property to be present for full -// sorting (listed above each function). If you're mapping thing objects to -// another representation, try to include all of these listed properties. - -// Expects thing properties: -// * directory (or override getDirectory) -// * name (or override getName) -export function sortAlphabetically(data, { - getDirectory, - getName, -} = {}) { - sortByDirectory(data, {getDirectory}); - sortByName(data, {getName}); - return data; -} - -// Expects thing properties: -// * directory (or override getDirectory) -// * name (or override getName) -// * date (or override getDate) -export function sortChronologically(data, { - latestFirst = false, - getDirectory, - getName, - getDate, -} = {}) { - sortAlphabetically(data, {getDirectory, getName}); - sortByDate(data, {latestFirst, getDate}); - return data; -} - -// This one's a little odd! Sorts an array of {entry, thing} pairs using -// the provided sortFunction, which will operate on each item's `thing`, not -// its entry (or the item as a whole). If multiple entries are associated -// with the same thing, they'll end up bunched together in the output, -// retaining their original relative positioning. -export function sortEntryThingPairs(data, sortFunction) { - const things = unique(data.map(item => item.thing)); - sortFunction(things); - - const outputArrays = []; - const thingToOutputArray = new Map(); - - for (const thing of things) { - const array = []; - thingToOutputArray.set(thing, array); - outputArrays.push(array); - } - - for (const item of data) { - thingToOutputArray.get(item.thing).push(item); - } - - data.splice(0, data.length, ...outputArrays.flat()); - - return data; -} - -/* -// Alternate draft version of sortEntryThingPairs. -// See: https://github.com/hsmusic/hsmusic-wiki/issues/90#issuecomment-1607412168 - -// Maps the provided "preparation" function across a list of arbitrary values, -// building up a list of sortable values; sorts these with the provided sorting -// function; and reorders the sources to match their corresponding prepared -// values. As usual, if multiple source items correspond to the same sorting -// data, this retains the source relative positioning. -export function prepareAndSort(sources, prepareForSort, sortFunction) { - const prepared = []; - const preparedToSource = new Map(); - - for (const original of originals) { - const prep = prepareForSort(source); - prepared.push(prep); - preparedToSource.set(prep, source); - } - - sortFunction(prepared); - - sources.splice(0, ...sources.length, prepared.map(prep => preparedToSource.get(prep))); - - return sources; -} -*/ - -// Highly contextual sort functions - these are only for very specific types -// of Things, and have appropriately hard-coded behavior. - -// Sorts so that tracks from the same album are generally grouped together in -// their original (album track list) order, while prioritizing date (by default -// release date but can be overridden) above all else. -// -// This function also works for data lists which contain only tracks. -export function sortAlbumsTracksChronologically(data, { - latestFirst = false, - getDate, -} = {}) { - // Sort albums before tracks... - sortByConditions(data, [(t) => t.album === undefined]); - - // Group tracks by album... - sortByDirectory(data, { - getDirectory: (t) => (t.album ? t.album.directory : t.directory), - }); - - // Sort tracks by position in album... - sortByPositionInAlbum(data); - - // ...and finally sort by date. If tracks from more than one album were - // released on the same date, they'll still be grouped together by album, - // and tracks within an album will retain their relative positioning (i.e. - // stay in the same order as part of the album's track listing). - sortByDate(data, {latestFirst, getDate}); - - return data; -} - -export function sortFlashesChronologically(data, { - latestFirst = false, - getDate, -} = {}) { - // Group flashes by act... - sortByDirectory(data, { - getDirectory: flash => flash.act.directory, - }); - - // Sort flashes by position in act... - sortByPositionInFlashAct(data); - - // ...and finally sort by date. If flashes from more than one act were - // released on the same date, they'll still be grouped together by act, - // and flashes within an act will retain their relative positioning (i.e. - // stay in the same order as the act's flash listing). - sortByDate(data, {latestFirst, getDate}); - - return data; -} - // Specific data utilities // Matches heading details from commentary data in roughly the formats: -- cgit 1.3.0-6-gf8a5