diff options
Diffstat (limited to 'src/util/wiki-data.js')
-rw-r--r-- | src/util/wiki-data.js | 242 |
1 files changed, 213 insertions, 29 deletions
diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js index b4f7f210..aba508c5 100644 --- a/src/util/wiki-data.js +++ b/src/util/wiki-data.js @@ -1,5 +1,7 @@ // Utility functions for interacting with wiki data. +import { Album, Track } from '../data/things.js'; + // Generic value operations export function getKebabCase(name) { @@ -62,32 +64,115 @@ export function chunkByProperties(array, properties) { })); } -// Sorting functions +// Sorting functions - all utils here are mutating, so make sure to initially +// slice/filter/somehow generate a new array from input data if retaining the +// initial sort matters! (Spoilers: If what you're doing involves any kind of +// parallelization, it definitely matters.) + +// General sorting utilities! These don't do any sorting on their own but are +// handy in the sorting functions below (or if you're making your own sort). + +export function compareCaseLessSensitive(a, b) { + // Compare two strings without considering capitalization... unless they + // happen to be the same that way. + + const al = a.toLowerCase(); + const bl = b.toLowerCase(); + + return (al === bl + ? a.localeCompare(b, undefined, {numeric: true}) + : al.localeCompare(bl, undefined, {numeric: true})); +} + +// Subtract common prefixes and other characters which some people don't like +// to have considered while sorting. The words part of this is English-only for +// now, which is totally evil. +export function normalizeName(s) { + // Turn (some) ligatures into expanded variant for cleaner sorting, e.g. + // "ff" into "ff", in decompose mode, so that "ü" is represented as two + // bytes ("u" + \u0308 combining diaeresis). + s = s.normalize('NFKD'); -export function sortByName(a, b) { - let an = a.name.toLowerCase(); - let bn = b.name.toLowerCase(); - if (an.startsWith('the ')) an = an.slice(4); - if (bn.startsWith('the ')) bn = bn.slice(4); - return an < bn ? -1 : an > bn ? 1 : 0; + // Replace one or more whitespace of any kind in a row, as well as certain + // punctuation, with a single typical space, then trim the ends. + s = s.replace(/[\p{Separator}\p{Dash_Punctuation}\p{Connector_Punctuation}]+/gu, ' ').trim(); + + // Discard anything that isn't a letter, number, or space. + s = s.replace(/[^\p{Letter}\p{Number} ]/gu, ''); + + // Remove common English (only, for now) prefixes. + s = s.replace(/^(?:an?|the) /i, ''); + + return s; +} + +// Component sort functions - these sort by one particular property, applying +// unique particulars where appropriate. Usually you don't want to use these +// directly, but if you're making a custom sort they can come in handy. + +// Universal method for sorting things into a predictable order, as directory +// is taken to be unique. There are two exceptions where this function (and +// thus any of the composite functions that start with it) *can't* be taken as +// deterministic: +// +// 1) Mixed data of two different Things, as directories are only taken as +// unique within one given class of Things. For example, this function +// won't be deterministic if its array contains both <album:ithaca> and +// <track:ithaca>. +// +// 2) Duplicate directories, or multiple instances of the "same" Thing. +// This function doesn't differentiate between two objects of the same +// directory, regardless of any other properties or the overall "identity" +// of the object. +// +// These exceptions are unavoidable except for not providing that kind of data +// in the first place, but you can still ensure the overall program output is +// deterministic by ensuring the input is arbitrarily sorted according to some +// other criteria - ex, although sortByDirectory itself isn't determinstic when +// given mixed track and album data, the final output (what goes on the site) +// will always be the same if you're doing sortByDirectory([...albumData, +// ...trackData]), because the initial sort places albums before tracks - and +// sortByDirectory will handle the rest, given all directories are unique +// except when album and track directories overlap with each other. +export function sortByDirectory(data, { + getDirectory = o => o.directory +} = {}) { + return data.sort((a, b) => { + const ad = getDirectory(a); + const bd = getDirectory(b); + return compareCaseLessSensitive(ad, bd) + }); } -// This function was originally made to sort just al8um data, 8ut its exact -// code works fine for sorting tracks too, so I made the varia8les and names -// more general. -export function sortByDate(data, dateKey = 'date') { - // Just to 8e clear: sort is a mutating function! I only return the array - // 8ecause then you don't have to define it as a separate varia8le 8efore - // passing it into this function. - return data.sort(({ [dateKey]: a }, { [dateKey]: b }) => { +export function sortByName(data, { + getName = o => o.name +} = {}) { + return data.sort((a, b) => { + const an = getName(a); + const bn = getName(b); + const ann = normalizeName(an); + const bnn = normalizeName(bn); + return ( + compareCaseLessSensitive(ann, bnn) || + compareCaseLessSensitive(an, bn)); + }); +} + +export function sortByDate(data, { + getDate = o => o.date +} = {}) { + return data.sort((a, b) => { + const ad = getDate(a); + const bd = getDate(b); + // It's possible for objects with and without dates to be mixed // together in the same array. If that's the case, we put all items // without dates at the end. - if (a && b) { - return a - b; - } else if (a) { + if (ad && bd) { + return ad - bd; + } else if (ad) { return -1; - } else if (b) { + } else if (bd) { return 1; } else { // If neither of the items being compared have a date, don't move @@ -99,9 +184,115 @@ export function sortByDate(data, dateKey = 'date') { }); } -// Same details as the sortByDate, 8ut for covers~ -export function sortByArtDate(data) { - return data.sort((a, b) => (a.coverArtDate || a.date) - (b.coverArtDate || b.date)); +export function sortByPositionInAlbum(data) { + return data.sort((a, b) => { + const aa = a.album; + const ba = b.album; + + // Don't change the sort when the two tracks are from separate albums. + // This function doesn't change the order of albums or try to "merge" + // two separated chunks of tracks from the same album together. + if (aa !== ba) { + return 0; + } + + // Don't change the sort when only one (or neither) item is actually + // a track (i.e. has an album). + if (!aa || !ba) { + return 0; + } + + const ai = aa.tracks.indexOf(a); + const bi = ba.tracks.indexOf(b); + + // There's no reason this two-way reference (a track's album and the + // album's track list) should be broken, but if for any reason it is, + // don't change the sort. + if (ai === -1 || bi === -1) { + return 0; + } + + return ai - bi; + }); +} + +// Note that this function only checks constructor equality, not inheritence! +// So it won't group subclasses together (as though they were the same type). +export function sortByThingType(data, thingConstructors) { + data.sort((a, b) => { + const ai = thingConstructors.indexOf(a.constructor); + const bi = thingConstructors.indexOf(b.constructor); + + if (ai >= 0 && bi >= 0) { + return ai - bi; + } else if (ai >= 0) { + return -1; + } else if (bi >= 0) { + return 1; + } else { + return 0; + } + }); +} + +// Composite sorting functions - these consider multiple properties, generally +// always returning the same output regardless of how the input was originally +// sorted (or left unsorted). If you're working with arbitrarily sorted inputs +// (typically wiki data, either in full or unsorted filter), these make sure +// what gets put on the actual website (or wherever) is deterministic. Also +// they're just handy sorting utilities. +// +// Note that because these are each comprised of multiple component sorting +// functions, they expect more than just one property to be present for full +// sorting (listed above each function). If you're mapping thing objects to +// another representation, try to include all of these listed properties. + +// Expects thing properties: +// * directory (or override getDirectory) +// * name (or override getName) +export function sortAlphabetically(data, {getDirectory, getName} = {}) { + sortByDirectory(data, {getDirectory}); + sortByName(data, {getName}); + return data; +} + +// Expects thing properties: +// * directory (or override getDirectory) +// * name (or override getName) +// * date (or override getDate) +export function sortChronologically(data, {getDirectory, getName, getDate} = {}) { + sortAlphabetically(data, {getDirectory, getName}); + sortByDate(data, {getDate}); + return data; +} + +// Highly contextual sort functions - these are only for very specific types +// of Things, and have appropriately hard-coded behavior. + +// Sorts so that tracks from the same album are generally grouped together in +// their original (album track list) order, while prioritizing date (by default +// release date but can be overridden) above all else. +// +// This function also works for data lists which contain only tracks. +export function sortAlbumsTracksChronologically(data, {getDate} = {}) { + // Sort albums before tracks... + sortByThingType(data, [Album, Track]); + + // Group tracks by album... + sortByDirectory(data, { + getDirectory: t => (t.album ? t.album.directory : t.directory) + }); + + // Sort tracks by position in album... + sortByPositionInAlbum(data); + + // ...and finally sort by date. If tracks from more than one album were + // released on the same date, they'll still be grouped together by album, + // and tracks within an album will retain their relative positioning (i.e. + // stay in the same order as part of the album's track listing). + sortByDate(data, {getDate}); + + return data; } // Specific data utilities @@ -152,13 +343,6 @@ export function getArtistNumContributions(artist) { ); } -export function getArtistCommentary(artist, {justEverythingMan}) { - return justEverythingMan.filter(thing => - (thing?.commentary - .replace(/<\/?b>/g, '') - .includes('<i>' + artist.name + ':</i>'))); -} - export function getFlashCover(flash, {to}) { return to('media.flashArt', flash.directory, flash.coverArtFileExtension); } |