« get me outta code hell

data: move sorting & generic functions out of #wiki-data - hsmusic-wiki - HSMusic - static wiki software cataloguing collaborative creation
about summary refs log tree commit diff
path: root/src/util/wiki-data.js
diff options
context:
space:
mode:
author(quasar) nebula <qznebula@protonmail.com>2024-02-16 10:16:36 -0400
committer(quasar) nebula <qznebula@protonmail.com>2024-02-16 10:29:50 -0400
commit5457b88f8d3d234af0b08d15f3c6249f6649aac3 (patch)
treec07cee2cbfcd35d8b4a373915f59e6b619531241 /src/util/wiki-data.js
parent7a3038b982c4d318e146698527070f096591aa4f (diff)
data: move sorting & generic functions out of #wiki-data
Diffstat (limited to 'src/util/wiki-data.js')
-rw-r--r--src/util/wiki-data.js619
1 files changed, 2 insertions, 617 deletions
diff --git a/src/util/wiki-data.js b/src/util/wiki-data.js
index cdbffff..89c04a2 100644
--- a/src/util/wiki-data.js
+++ b/src/util/wiki-data.js
@@ -1,6 +1,7 @@
 // Utility functions for interacting with wiki data.
 
-import {accumulateSum, empty, unique} from './sugar.js';
+import {accumulateSum, empty} from './sugar.js';
+import {sortByDate} from './sort.js';
 
 // Generic value operations
 
@@ -15,622 +16,6 @@ export function getKebabCase(name) {
     .toLowerCase();
 }
 
-export function chunkByConditions(array, conditions) {
-  if (empty(array)) {
-    return [];
-  }
-
-  if (empty(conditions)) {
-    return [array];
-  }
-
-  const out = [];
-  let cur = [array[0]];
-  for (let i = 1; i < array.length; i++) {
-    const item = array[i];
-    const prev = array[i - 1];
-    let chunk = false;
-    for (const condition of conditions) {
-      if (condition(item, prev)) {
-        chunk = true;
-        break;
-      }
-    }
-    if (chunk) {
-      out.push(cur);
-      cur = [item];
-    } else {
-      cur.push(item);
-    }
-  }
-  out.push(cur);
-  return out;
-}
-
-export function chunkByProperties(array, properties) {
-  return chunkByConditions(
-    array,
-    properties.map((p) => (a, b) => {
-      if (a[p] instanceof Date && b[p] instanceof Date) return +a[p] !== +b[p];
-
-      if (a[p] !== b[p]) return true;
-
-      // Not sure if this line is still necessary with the specific check for
-      // d8tes a8ove, 8ut, uh, keeping it anyway, just in case....?
-      if (a[p] != b[p]) return true;
-
-      return false;
-    })
-  ).map((chunk) => ({
-    ...Object.fromEntries(properties.map((p) => [p, chunk[0][p]])),
-    chunk,
-  }));
-}
-
-export function chunkMultipleArrays(...args) {
-  const arrays = args.slice(0, -1);
-  const fn = args.at(-1);
-
-  if (arrays[0].length === 0) {
-    return [];
-  }
-
-  const newChunk = index => arrays.map(array => [array[index]]);
-  const results = [newChunk(0)];
-
-  for (let i = 1; i < arrays[0].length; i++) {
-    const current = results.at(-1);
-
-    const args = [];
-    for (let j = 0; j < arrays.length; j++) {
-      const item = arrays[j][i];
-      const previous = current[j].at(-1);
-      args.push(item, previous);
-    }
-
-    if (fn(...args)) {
-      results.push(newChunk(i));
-      continue;
-    }
-
-    for (let j = 0; j < arrays.length; j++) {
-      current[j].push(arrays[j][i]);
-    }
-  }
-
-  return results;
-}
-
-// Sorting functions - all utils here are mutating, so make sure to initially
-// slice/filter/somehow generate a new array from input data if retaining the
-// initial sort matters! (Spoilers: If what you're doing involves any kind of
-// parallelization, it definitely matters.)
-
-// General sorting utilities! These don't do any sorting on their own but are
-// handy in the sorting functions below (or if you're making your own sort).
-
-export function compareCaseLessSensitive(a, b) {
-  // Compare two strings without considering capitalization... unless they
-  // happen to be the same that way.
-
-  const al = a.toLowerCase();
-  const bl = b.toLowerCase();
-
-  return al === bl
-    ? a.localeCompare(b, undefined, {numeric: true})
-    : al.localeCompare(bl, undefined, {numeric: true});
-}
-
-// Subtract common prefixes and other characters which some people don't like
-// to have considered while sorting. The words part of this is English-only for
-// now, which is totally evil.
-export function normalizeName(s) {
-  // Turn (some) ligatures into expanded variant for cleaner sorting, e.g.
-  // "ff" into "ff", in decompose mode, so that "ü" is represented as two
-  // bytes ("u" + \u0308 combining diaeresis).
-  s = s.normalize('NFKD');
-
-  // Replace one or more whitespace of any kind in a row, as well as certain
-  // punctuation, with a single typical space, then trim the ends.
-  s = s
-    .replace(
-      /[\p{Separator}\p{Dash_Punctuation}\p{Connector_Punctuation}]+/gu,
-      ' '
-    )
-    .trim();
-
-  // Discard anything that isn't a letter, number, or space.
-  s = s.replace(/[^\p{Letter}\p{Number} ]/gu, '').trim();
-
-  // Remove common English (only, for now) prefixes.
-  s = s.replace(/^(?:an?|the) /i, '');
-
-  return s;
-}
-
-// Sorts multiple arrays by an arbitrary function (which is the last argument).
-// Paired values from each array are provided to the callback sequentially:
-//
-//   (a_fromFirstArray, b_fromFirstArray,
-//    a_fromSecondArray, b_fromSecondArray,
-//    a_fromThirdArray, b_fromThirdArray) =>
-//     relative positioning (negative, positive, or zero)
-//
-// Like native single-array sort, this is a mutating function.
-export function sortMultipleArrays(...args) {
-  const arrays = args.slice(0, -1);
-  const fn = args.at(-1);
-
-  const length = arrays[0].length;
-  const symbols = new Array(length).fill(null).map(() => Symbol());
-  const indexes = Object.fromEntries(symbols.map((symbol, index) => [symbol, index]));
-
-  symbols.sort((a, b) => {
-    const indexA = indexes[a];
-    const indexB = indexes[b];
-
-    const args = [];
-    for (let i = 0; i < arrays.length; i++) {
-      args.push(arrays[i][indexA]);
-      args.push(arrays[i][indexB]);
-    }
-
-    return fn(...args);
-  });
-
-  for (const array of arrays) {
-    // Note: We're mutating this array pulling values from itself, but only all
-    // at once after all those values have been pulled.
-    array.splice(0, array.length, ...symbols.map(symbol => array[indexes[symbol]]));
-  }
-
-  return arrays;
-}
-
-// Filters multiple arrays by an arbitrary function (which is the last argument).
-// Values from each array are provided to the callback sequentially:
-//
-//   (value_fromFirstArray,
-//    value_fromSecondArray,
-//    value_fromThirdArray,
-//    index,
-//    [firstArray, secondArray, thirdArray]) =>
-//      true or false
-//
-// Please be aware that this is a mutating function, unlike native single-array
-// filter. The mutated arrays are returned. Also attached under `.removed` are
-// corresponding arrays of items filtered out.
-export function filterMultipleArrays(...args) {
-  const arrays = args.slice(0, -1);
-  const fn = args.at(-1);
-
-  const removed = new Array(arrays.length).fill(null).map(() => []);
-
-  for (let i = arrays[0].length - 1; i >= 0; i--) {
-    const args = arrays.map(array => array[i]);
-    args.push(i, arrays);
-
-    if (!fn(...args)) {
-      for (let j = 0; j < arrays.length; j++) {
-        const item = arrays[j][i];
-        arrays[j].splice(i, 1);
-        removed[j].unshift(item);
-      }
-    }
-  }
-
-  Object.assign(arrays, {removed});
-  return arrays;
-}
-
-// Reduces multiple arrays with an arbitrary function (which is the last
-// argument). Note that this reduces into multiple accumulators, one for
-// each input array, not just a single value. That's reflected in both the
-// callback parameters:
-//
-//   (accumulator1,
-//    accumulator2,
-//    value_fromFirstArray,
-//    value_fromSecondArray,
-//    index,
-//    [firstArray, secondArray]) =>
-//      [newAccumulator1, newAccumulator2]
-//
-// As well as the final return value of reduceMultipleArrays:
-//
-//   [finalAccumulator1, finalAccumulator2]
-//
-// This is not a mutating function.
-export function reduceMultipleArrays(...args) {
-  const [arrays, fn, initialAccumulators] =
-    (typeof args.at(-1) === 'function'
-      ? [args.slice(0, -1), args.at(-1), null]
-      : [args.slice(0, -2), args.at(-2), args.at(-1)]);
-
-  if (empty(arrays[0])) {
-    throw new TypeError(`Reduce of empty arrays with no initial value`);
-  }
-
-  let [accumulators, i] =
-    (initialAccumulators
-      ? [initialAccumulators, 0]
-      : [arrays.map(array => array[0]), 1]);
-
-  for (; i < arrays[0].length; i++) {
-    const args = [...accumulators, ...arrays.map(array => array[i])];
-    args.push(i, arrays);
-    accumulators = fn(...args);
-  }
-
-  return accumulators;
-}
-
-// Component sort functions - these sort by one particular property, applying
-// unique particulars where appropriate. Usually you don't want to use these
-// directly, but if you're making a custom sort they can come in handy.
-
-// Universal method for sorting things into a predictable order, as directory
-// is taken to be unique. There are two exceptions where this function (and
-// thus any of the composite functions that start with it) *can't* be taken as
-// deterministic:
-//
-//  1) Mixed data of two different Things, as directories are only taken as
-//     unique within one given class of Things. For example, this function
-//     won't be deterministic if its array contains both <album:ithaca> and
-//     <track:ithaca>.
-//
-//  2) Duplicate directories, or multiple instances of the "same" Thing.
-//     This function doesn't differentiate between two objects of the same
-//     directory, regardless of any other properties or the overall "identity"
-//     of the object.
-//
-// These exceptions are unavoidable except for not providing that kind of data
-// in the first place, but you can still ensure the overall program output is
-// deterministic by ensuring the input is arbitrarily sorted according to some
-// other criteria - ex, although sortByDirectory itself isn't determinstic when
-// given mixed track and album data, the final output (what goes on the site)
-// will always be the same if you're doing sortByDirectory([...albumData,
-// ...trackData]), because the initial sort places albums before tracks - and
-// sortByDirectory will handle the rest, given all directories are unique
-// except when album and track directories overlap with each other.
-export function sortByDirectory(data, {
-  getDirectory = object => object.directory,
-} = {}) {
-  const directories = data.map(getDirectory);
-
-  sortMultipleArrays(data, directories,
-    (a, b, directoryA, directoryB) =>
-      compareCaseLessSensitive(directoryA, directoryB));
-
-  return data;
-}
-
-export function sortByName(data, {
-  getName = object => object.name,
-} = {}) {
-  const names = data.map(getName);
-  const normalizedNames = names.map(normalizeName);
-
-  sortMultipleArrays(data, normalizedNames, names,
-    (
-      a, b,
-      normalizedA, normalizedB,
-      nonNormalizedA, nonNormalizedB,
-    ) =>
-      compareNormalizedNames(
-        normalizedA, normalizedB,
-        nonNormalizedA, nonNormalizedB,
-      ));
-
-  return data;
-}
-
-export function compareNormalizedNames(
-  normalizedA, normalizedB,
-  nonNormalizedA, nonNormalizedB,
-) {
-  const comparison = compareCaseLessSensitive(normalizedA, normalizedB);
-  return (
-    (comparison === 0
-      ? compareCaseLessSensitive(nonNormalizedA, nonNormalizedB)
-      : comparison));
-}
-
-export function sortByDate(data, {
-  getDate = object => object.date,
-  latestFirst = false,
-} = {}) {
-  const dates = data.map(getDate);
-
-  sortMultipleArrays(data, dates,
-    (a, b, dateA, dateB) =>
-      compareDates(dateA, dateB, {latestFirst}));
-
-  return data;
-}
-
-export function compareDates(a, b, {
-  latestFirst = false,
-} = {}) {
-  if (a && b) {
-    return (latestFirst ? b - a : a - b);
-  }
-
-  // It's possible for objects with and without dates to be mixed
-  // together in the same array. If that's the case, we put all items
-  // without dates at the end.
-  if (a) return -1;
-  if (b) return 1;
-
-  // If neither of the items being compared have a date, don't move
-  // them relative to each other. This is basically the same as
-  // filtering out all non-date items and then pushing them at the
-  // end after sorting the rest.
-  return 0;
-}
-
-export function getLatestDate(dates) {
-  const filtered = dates.filter(Boolean);
-  if (empty(filtered)) return null;
-
-  return filtered
-    .reduce(
-      (accumulator, date) =>
-        date > accumulator ? date : accumulator,
-      -Infinity);
-}
-
-export function getEarliestDate(dates) {
-  const filtered = dates.filter(Boolean);
-  if (empty(filtered)) return null;
-
-  return filtered
-    .reduce(
-      (accumulator, date) =>
-        date < accumulator ? date : accumulator,
-      Infinity);
-}
-
-// Funky sort which takes a data set and a corresponding list of "counts",
-// which are really arbitrary numbers representing some property of each data
-// object defined by the caller. It sorts and mutates *both* of these, so the
-// sorted data will still correspond to the same indexed count.
-export function sortByCount(data, counts, {
-  greatestFirst = false,
-} = {}) {
-  sortMultipleArrays(data, counts, (data1, data2, count1, count2) =>
-    (greatestFirst
-      ? count2 - count1
-      : count1 - count2));
-
-  return data;
-}
-
-// Corresponding filter function for the above sort. By default, items whose
-// corresponding count is zero will be removed.
-export function filterByCount(data, counts, {
-  min = 1,
-  max = Infinity,
-} = {}) {
-  filterMultipleArrays(data, counts, (data, count) =>
-    count >= min && count <= max);
-}
-
-export function sortByPositionInParent(data, {
-  getParent,
-  getChildren,
-}) {
-  return data.sort((a, b) => {
-    const parentA = getParent(a);
-    const parentB = getParent(b);
-
-    // Don't change the sort when the two items are from separate parents.
-    // This function doesn't change the order of parents or try to "merge"
-    // two separated chunks of items from the same parent together.
-    if (parentA !== parentB) {
-      return 0;
-    }
-
-    // Don't change the sort when either (or both) of the items doesn't
-    // even have a parent (e.g. it's the passed data is a mixed array of
-    // children and parents).
-    if (!parentA || !parentB) {
-      return 0;
-    }
-
-    const indexA = getChildren(parentA).indexOf(a);
-    const indexB = getChildren(parentB).indexOf(b);
-
-    // If the getParent/getChildren relationship doesn't go both ways for
-    // some reason, don't change the sort.
-    if (indexA === -1 || indexB === -1) {
-      return 0;
-    }
-
-    return indexA - indexB;
-  });
-}
-
-export function sortByPositionInAlbum(data) {
-  return sortByPositionInParent(data, {
-    getParent: track => track.album,
-    getChildren: album => album.tracks,
-  });
-}
-
-export function sortByPositionInFlashAct(data) {
-  return sortByPositionInParent(data, {
-    getParent: flash => flash.act,
-    getChildren: act => act.flashes,
-  });
-}
-
-// Sorts data so that items are grouped together according to whichever of a
-// set of arbitrary given conditions is true first. If no conditions are met
-// for a given item, it's moved over to the end!
-export function sortByConditions(data, conditions) {
-  return data.sort((a, b) => {
-    const ai = conditions.findIndex((f) => f(a));
-    const bi = conditions.findIndex((f) => f(b));
-
-    if (ai >= 0 && bi >= 0) {
-      return ai - bi;
-    } else if (ai >= 0) {
-      return -1;
-    } else if (bi >= 0) {
-      return 1;
-    } else {
-      return 0;
-    }
-  });
-}
-
-// Composite sorting functions - these consider multiple properties, generally
-// always returning the same output regardless of how the input was originally
-// sorted (or left unsorted). If you're working with arbitrarily sorted inputs
-// (typically wiki data, either in full or unsorted filter), these make sure
-// what gets put on the actual website (or wherever) is deterministic. Also
-// they're just handy sorting utilities.
-//
-// Note that because these are each comprised of multiple component sorting
-// functions, they expect more than just one property to be present for full
-// sorting (listed above each function). If you're mapping thing objects to
-// another representation, try to include all of these listed properties.
-
-// Expects thing properties:
-//  * directory (or override getDirectory)
-//  * name (or override getName)
-export function sortAlphabetically(data, {
-  getDirectory,
-  getName,
-} = {}) {
-  sortByDirectory(data, {getDirectory});
-  sortByName(data, {getName});
-  return data;
-}
-
-// Expects thing properties:
-//  * directory (or override getDirectory)
-//  * name (or override getName)
-//  * date (or override getDate)
-export function sortChronologically(data, {
-  latestFirst = false,
-  getDirectory,
-  getName,
-  getDate,
-} = {}) {
-  sortAlphabetically(data, {getDirectory, getName});
-  sortByDate(data, {latestFirst, getDate});
-  return data;
-}
-
-// This one's a little odd! Sorts an array of {entry, thing} pairs using
-// the provided sortFunction, which will operate on each item's `thing`, not
-// its entry (or the item as a whole). If multiple entries are associated
-// with the same thing, they'll end up bunched together in the output,
-// retaining their original relative positioning.
-export function sortEntryThingPairs(data, sortFunction) {
-  const things = unique(data.map(item => item.thing));
-  sortFunction(things);
-
-  const outputArrays = [];
-  const thingToOutputArray = new Map();
-
-  for (const thing of things) {
-    const array = [];
-    thingToOutputArray.set(thing, array);
-    outputArrays.push(array);
-  }
-
-  for (const item of data) {
-    thingToOutputArray.get(item.thing).push(item);
-  }
-
-  data.splice(0, data.length, ...outputArrays.flat());
-
-  return data;
-}
-
-/*
-// Alternate draft version of sortEntryThingPairs.
-// See: https://github.com/hsmusic/hsmusic-wiki/issues/90#issuecomment-1607412168
-
-// Maps the provided "preparation" function across a list of arbitrary values,
-// building up a list of sortable values; sorts these with the provided sorting
-// function; and reorders the sources to match their corresponding prepared
-// values. As usual, if multiple source items correspond to the same sorting
-// data, this retains the source relative positioning.
-export function prepareAndSort(sources, prepareForSort, sortFunction) {
-  const prepared = [];
-  const preparedToSource = new Map();
-
-  for (const original of originals) {
-    const prep = prepareForSort(source);
-    prepared.push(prep);
-    preparedToSource.set(prep, source);
-  }
-
-  sortFunction(prepared);
-
-  sources.splice(0, ...sources.length, prepared.map(prep => preparedToSource.get(prep)));
-
-  return sources;
-}
-*/
-
-// Highly contextual sort functions - these are only for very specific types
-// of Things, and have appropriately hard-coded behavior.
-
-// Sorts so that tracks from the same album are generally grouped together in
-// their original (album track list) order, while prioritizing date (by default
-// release date but can be overridden) above all else.
-//
-// This function also works for data lists which contain only tracks.
-export function sortAlbumsTracksChronologically(data, {
-  latestFirst = false,
-  getDate,
-} = {}) {
-  // Sort albums before tracks...
-  sortByConditions(data, [(t) => t.album === undefined]);
-
-  // Group tracks by album...
-  sortByDirectory(data, {
-    getDirectory: (t) => (t.album ? t.album.directory : t.directory),
-  });
-
-  // Sort tracks by position in album...
-  sortByPositionInAlbum(data);
-
-  // ...and finally sort by date. If tracks from more than one album were
-  // released on the same date, they'll still be grouped together by album,
-  // and tracks within an album will retain their relative positioning (i.e.
-  // stay in the same order as part of the album's track listing).
-  sortByDate(data, {latestFirst, getDate});
-
-  return data;
-}
-
-export function sortFlashesChronologically(data, {
-  latestFirst = false,
-  getDate,
-} = {}) {
-  // Group flashes by act...
-  sortByDirectory(data, {
-    getDirectory: flash => flash.act.directory,
-  });
-
-  // Sort flashes by position in act...
-  sortByPositionInFlashAct(data);
-
-  // ...and finally sort by date. If flashes from more than one act were
-  // released on the same date, they'll still be grouped together by act,
-  // and flashes within an act will retain their relative positioning (i.e.
-  // stay in the same order as the act's flash listing).
-  sortByDate(data, {latestFirst, getDate});
-
-  return data;
-}
-
 // Specific data utilities
 
 // Matches heading details from commentary data in roughly the formats: