« get me outta code hell

various sorting improvements across the board - hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src/util/wiki-data.js
diff options
context:
space:
mode:
author(quasar) nebula <qznebula@protonmail.com>2022-06-10 17:07:01 -0300
committer(quasar) nebula <qznebula@protonmail.com>2022-06-10 17:07:01 -0300
commit531219642b93a5dc236f217d22f024fb2707191f (patch)
tree644449168c11152546f883bf4122a97fd17d0d16 /src/util/wiki-data.js
parent6e430bcdf251fbc8cbcfd4c48a8bbc1bf134120f (diff)
various sorting improvements across the board
- several bugs introduced during data restructure fixed
- a few areas where sorting functions were present but completely ineffective
    (albeit harmlessly so) fixed
- new composite sorting utils are more deterministic by considering multiple
    factors instead of e.g. only name, only date
- component sorting functions can be combined and pay more attention to
    determinism and only moving items when it's covered by the scope of the
    component (i.e. they can more readily be composited / used together)
- names are normalized in a more particular manner (for alphabetical sorting)
    - accents are stripped, "ü" sorts as "u"
    - punctuation is stripped, "dave's" sorts as "daves"
    - (some) diacritics are expanded, "ff" sorts as "ff"
    - any sequential whitespace treated as one typical space
    - whitespace is trimmed from start and end
    - english prefix "a" / "an" stripped in addition to "the"
- alphabetical sorting uses localeCompare and considers numeric strings more
    naturally, "Homestuck Vol. 10" sorts after "Homestuck Vol. 5"
Diffstat (limited to 'src/util/wiki-data.js')
-rw-r--r--src/util/wiki-data.js242
1 files changed, 213 insertions, 29 deletions
diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js
index b4f7f21..aba508c 100644
--- a/src/util/wiki-data.js
+++ b/src/util/wiki-data.js
@@ -1,5 +1,7 @@
 // Utility functions for interacting with wiki data.
 
+import { Album, Track } from '../data/things.js';
+
 // Generic value operations
 
 export function getKebabCase(name) {
@@ -62,32 +64,115 @@ export function chunkByProperties(array, properties) {
         }));
 }
 
-// Sorting functions
+// Sorting functions - all utils here are mutating, so make sure to initially
+// slice/filter/somehow generate a new array from input data if retaining the
+// initial sort matters! (Spoilers: If what you're doing involves any kind of
+// parallelization, it definitely matters.)
+
+// General sorting utilities! These don't do any sorting on their own but are
+// handy in the sorting functions below (or if you're making your own sort).
+
+export function compareCaseLessSensitive(a, b) {
+    // Compare two strings without considering capitalization... unless they
+    // happen to be the same that way.
+
+    const al = a.toLowerCase();
+    const bl = b.toLowerCase();
+
+    return (al === bl
+        ? a.localeCompare(b, undefined, {numeric: true})
+        : al.localeCompare(bl, undefined, {numeric: true}));
+}
+
+// Subtract common prefixes and other characters which some people don't like
+// to have considered while sorting. The words part of this is English-only for
+// now, which is totally evil.
+export function normalizeName(s) {
+    // Turn (some) ligatures into expanded variant for cleaner sorting, e.g.
+    // "ff" into "ff", in decompose mode, so that "ü" is represented as two
+    // bytes ("u" + \u0308 combining diaeresis).
+    s = s.normalize('NFKD');
 
-export function sortByName(a, b) {
-    let an = a.name.toLowerCase();
-    let bn = b.name.toLowerCase();
-    if (an.startsWith('the ')) an = an.slice(4);
-    if (bn.startsWith('the ')) bn = bn.slice(4);
-    return an < bn ? -1 : an > bn ? 1 : 0;
+    // Replace one or more whitespace of any kind in a row, as well as certain
+    // punctuation, with a single typical space, then trim the ends.
+    s = s.replace(/[\p{Separator}\p{Dash_Punctuation}\p{Connector_Punctuation}]+/gu, ' ').trim();
+
+    // Discard anything that isn't a letter, number, or space.
+    s = s.replace(/[^\p{Letter}\p{Number} ]/gu, '');
+
+    // Remove common English (only, for now) prefixes.
+    s = s.replace(/^(?:an?|the) /i, '');
+
+    return s;
+}
+
+// Component sort functions - these sort by one particular property, applying
+// unique particulars where appropriate. Usually you don't want to use these
+// directly, but if you're making a custom sort they can come in handy.
+
+// Universal method for sorting things into a predictable order, as directory
+// is taken to be unique. There are two exceptions where this function (and
+// thus any of the composite functions that start with it) *can't* be taken as
+// deterministic:
+//
+//  1) Mixed data of two different Things, as directories are only taken as
+//     unique within one given class of Things. For example, this function
+//     won't be deterministic if its array contains both <album:ithaca> and
+//     <track:ithaca>.
+//
+//  2) Duplicate directories, or multiple instances of the "same" Thing.
+//     This function doesn't differentiate between two objects of the same
+//     directory, regardless of any other properties or the overall "identity"
+//     of the object.
+//
+// These exceptions are unavoidable except for not providing that kind of data
+// in the first place, but you can still ensure the overall program output is
+// deterministic by ensuring the input is arbitrarily sorted according to some
+// other criteria - ex, although sortByDirectory itself isn't determinstic when
+// given mixed track and album data, the final output (what goes on the site)
+// will always be the same if you're doing sortByDirectory([...albumData,
+// ...trackData]), because the initial sort places albums before tracks - and
+// sortByDirectory will handle the rest, given all directories are unique
+// except when album and track directories overlap with each other.
+export function sortByDirectory(data, {
+    getDirectory = o => o.directory
+} = {}) {
+    return data.sort((a, b) => {
+        const ad = getDirectory(a);
+        const bd = getDirectory(b);
+        return compareCaseLessSensitive(ad, bd)
+    });
 }
 
-// This function was originally made to sort just al8um data, 8ut its exact
-// code works fine for sorting tracks too, so I made the varia8les and names
-// more general.
-export function sortByDate(data, dateKey = 'date') {
-    // Just to 8e clear: sort is a mutating function! I only return the array
-    // 8ecause then you don't have to define it as a separate varia8le 8efore
-    // passing it into this function.
-    return data.sort(({ [dateKey]: a }, { [dateKey]: b }) => {
+export function sortByName(data, {
+    getName = o => o.name
+} = {}) {
+    return data.sort((a, b) => {
+        const an = getName(a);
+        const bn = getName(b);
+        const ann = normalizeName(an);
+        const bnn = normalizeName(bn);
+        return (
+            compareCaseLessSensitive(ann, bnn) ||
+            compareCaseLessSensitive(an, bn));
+    });
+}
+
+export function sortByDate(data, {
+    getDate = o => o.date
+} = {}) {
+    return data.sort((a, b) => {
+        const ad = getDate(a);
+        const bd = getDate(b);
+
         // It's possible for objects with and without dates to be mixed
         // together in the same array. If that's the case, we put all items
         // without dates at the end.
-        if (a && b) {
-            return a - b;
-        } else if (a) {
+        if (ad && bd) {
+            return ad - bd;
+        } else if (ad) {
             return -1;
-        } else if (b) {
+        } else if (bd) {
             return 1;
         } else {
             // If neither of the items being compared have a date, don't move
@@ -99,9 +184,115 @@ export function sortByDate(data, dateKey = 'date') {
     });
 }
 
-// Same details as the sortByDate, 8ut for covers~
-export function sortByArtDate(data) {
-    return data.sort((a, b) => (a.coverArtDate || a.date) - (b.coverArtDate || b.date));
+export function sortByPositionInAlbum(data) {
+    return data.sort((a, b) => {
+        const aa = a.album;
+        const ba = b.album;
+
+        // Don't change the sort when the two tracks are from separate albums.
+        // This function doesn't change the order of albums or try to "merge"
+        // two separated chunks of tracks from the same album together.
+        if (aa !== ba) {
+            return 0;
+        }
+
+        // Don't change the sort when only one (or neither) item is actually
+        // a track (i.e. has an album).
+        if (!aa || !ba) {
+            return 0;
+        }
+
+        const ai = aa.tracks.indexOf(a);
+        const bi = ba.tracks.indexOf(b);
+
+        // There's no reason this two-way reference (a track's album and the
+        // album's track list) should be broken, but if for any reason it is,
+        // don't change the sort.
+        if (ai === -1 || bi === -1) {
+            return 0;
+        }
+
+        return ai - bi;
+    });
+}
+
+// Note that this function only checks constructor equality, not inheritence!
+// So it won't group subclasses together (as though they were the same type).
+export function sortByThingType(data, thingConstructors) {
+    data.sort((a, b) => {
+        const ai = thingConstructors.indexOf(a.constructor);
+        const bi = thingConstructors.indexOf(b.constructor);
+
+        if (ai >= 0 && bi >= 0) {
+            return ai - bi;
+        } else if (ai >= 0) {
+            return -1;
+        } else if (bi >= 0) {
+            return 1;
+        } else {
+            return 0;
+        }
+    });
+}
+
+// Composite sorting functions - these consider multiple properties, generally
+// always returning the same output regardless of how the input was originally
+// sorted (or left unsorted). If you're working with arbitrarily sorted inputs
+// (typically wiki data, either in full or unsorted filter), these make sure
+// what gets put on the actual website (or wherever) is deterministic. Also
+// they're just handy sorting utilities.
+//
+// Note that because these are each comprised of multiple component sorting
+// functions, they expect more than just one property to be present for full
+// sorting (listed above each function). If you're mapping thing objects to
+// another representation, try to include all of these listed properties.
+
+// Expects thing properties:
+//  * directory (or override getDirectory)
+//  * name (or override getName)
+export function sortAlphabetically(data, {getDirectory, getName} = {}) {
+    sortByDirectory(data, {getDirectory});
+    sortByName(data, {getName});
+    return data;
+}
+
+// Expects thing properties:
+//  * directory (or override getDirectory)
+//  * name (or override getName)
+//  * date (or override getDate)
+export function sortChronologically(data, {getDirectory, getName, getDate} = {}) {
+    sortAlphabetically(data, {getDirectory, getName});
+    sortByDate(data, {getDate});
+    return data;
+}
+
+// Highly contextual sort functions - these are only for very specific types
+// of Things, and have appropriately hard-coded behavior.
+
+// Sorts so that tracks from the same album are generally grouped together in
+// their original (album track list) order, while prioritizing date (by default
+// release date but can be overridden) above all else.
+//
+// This function also works for data lists which contain only tracks.
+export function sortAlbumsTracksChronologically(data, {getDate} = {}) {
+    // Sort albums before tracks...
+    sortByThingType(data, [Album, Track]);
+
+    // Group tracks by album...
+    sortByDirectory(data, {
+        getDirectory: t => (t.album ? t.album.directory : t.directory)
+    });
+
+    // Sort tracks by position in album...
+    sortByPositionInAlbum(data);
+
+    // ...and finally sort by date. If tracks from more than one album were
+    // released on the same date, they'll still be grouped together by album,
+    // and tracks within an album will retain their relative positioning (i.e.
+    // stay in the same order as part of the album's track listing).
+    sortByDate(data, {getDate});
+
+    return data;
 }
 
 // Specific data utilities
@@ -152,13 +343,6 @@ export function getArtistNumContributions(artist) {
     );
 }
 
-export function getArtistCommentary(artist, {justEverythingMan}) {
-    return justEverythingMan.filter(thing =>
-        (thing?.commentary
-            .replace(/<\/?b>/g, '')
-            .includes('<i>' + artist.name + ':</i>')));
-}
-
 export function getFlashCover(flash, {to}) {
     return to('media.flashArt', flash.directory, flash.coverArtFileExtension);
 }