1 files changed, 331 insertions, 10 deletions
diff --git a/playlist-utils.js b/playlist-utils.js
index 1015748..979c6d6 100644
--- a/playlist-utils.js
+++ b/playlist-utils.js
@@ -166,15 +166,30 @@ function flattenGrouplike(grouplike) {
   // levels in the group tree and returns them as a new group containing those
   // tracks.
 
-  return {
-    items: grouplike.items.map(item => {
-      if (isGroup(item)) {
-        return flattenGrouplike(item).items
-      } else {
-        return [item]
-      }
-    }).reduce((a, b) => a.concat(b), [])
-  }
+  return {items: getFlatTrackList(grouplike)}
+}
+
+function getFlatTrackList(grouplike) {
+  // Underlying function for flattenGrouplike. Can be used if you just want to
+  // get an array and not a grouplike, too.
+
+  return grouplike.items.map(item => {
+    if (isGroup(item)) {
+      return getFlatTrackList(item)
+    } else {
+      return [item]
+    }
+  }).reduce((a, b) => a.concat(b), [])
+}
+
+function getFlatGroupList(grouplike) {
+  // Analogue of getFlatTrackList for groups instead of tracks. Returns a flat
+  // array of all the groups in each level of the provided grouplike.
+
+  return grouplike.items
+    .filter(isGroup)
+    .map(item => [item, ...getFlatGroupList(item)])
+    .reduce((a, b) => a.concat(b), [])
 }
 
 function countTotalTracks(item) {
@@ -692,12 +707,285 @@ function getCorrespondingPlayableForFile(item) {
   return parent.items.find(item => isPlayable(item) && path.basename(item.url, path.extname(item.url)) === basename)
 }
 
+function getPathScore(path1, path2) {
+  // This function is basically only used in findTrackObject, but it's kinda
+  // huge and I need to test that it works outside of that context, so I'm
+  // sticking it on the global scope. Feel free to steal for whatever your
+  // weird future need for comparing any two paths is!
+  //
+  // path1 and path2 should be arrays of group names, according to the path
+  // you'd follow to open the groups and access a contained track. They should
+  // *not* include the track name, unless you want those to be considered a
+  // valid place for the paths to cross over!
+  //
+  // --
+  //
+  // A path score is determined to be the number of groups which must be
+  // traversed across the two paths to find a matching group name and then
+  // reach the other track under that group. A lower score implies a closer
+  // match (since score increases not with "closeness" but "separation").
+  //
+  // For example, these two paths are considered to have a score of zero
+  // against each other ("T" represents the track):
+  //
+  //   X/B/C/T
+  //   Y/B/C/T
+  //
+  // Their separation is zero because, starting from the closest (i.e. top)
+  // group to either the provided track or the reference data track, it takes
+  // zero additional steps to reach a group whose name is shared between the
+  // two paths: those top groups already have the same name.
+  //
+  // The above example indicates that the pattern before the closest matching
+  // path does not matter. Indeed, the actual length of the path could be
+  // different (W/X/B/C versus Y/B/C for example), and the score would still
+  // be the same. Parts of the path prepending the closest matching group
+  // name are thus ommitted from following examples.
+  //
+  // These paths, on the other hand, have a score of one:
+  //
+  //   (...)/C/T
+  //   (...)/C/D/T
+  //
+  // The closest matching name in this path is C. It is zero steps further
+  // from the start of the first path (C is the start); on the other path,
+  // it is one step further (D must be passed first). Therefore, the total
+  // steps that must be travelled to reach the start of one path to the
+  // start of the other by passing through the closest overlapping name is
+  // one: 0 + 1 = 1.
+  //
+  // In determining which of two paths are a closer match to a provided
+  // reference path, it's important to remember that a lower score (implying
+  // less separation) is better. Though we'll see the following example is
+  // probably more common across most music libraries, a reasonably natural
+  // example of the path structures above occurring in a music library could
+  // be this: an artist directory containing both albums and stray tracks,
+  // where one track apparently appears as both a stray track file and in an
+  // adjacent album directory; or, a mixtape which contains adjacent to its
+  // mixed-segment track listing a folder of the unmixed segments.
+  //
+  // These paths have a score of two:
+  //
+  //   (...)/B/C/T
+  //   (...)/B/D/T
+  //
+  // With the above examples, this one is fairly self explanatory. In this
+  // case, the closest matching group, B, is one step away from the start
+  // point (the first group before the track, i.e, the top name in the path)
+  // in both paths. Summed, the distance (and thus the score) is two.
+  //
+  // This example demonstrates what is probably a more realistic case of two
+  // tracks resembling each other (e.g. having the same name or source) but
+  // not sharing the same path: if B represents an artist, and C & D stand in
+  // place (in this example) of the names of that artist's albums, then it is
+  // reasonable to say the directories for the album are slightly different
+  // across the two paths. This could be the case for two users who ended up
+  // naming the album directory differently, or for one user restoring from
+  // their own backend/playlist after having adjusted the naming structure of
+  // their music library. It's also possible that there could simply be two
+  // albums by the same artist which contain a track of the same name; in
+  // that case, the path score implementation is doing exactly its job by
+  // indicating that these tracks would have a greater score (meaning further
+  // separation) than when checking against the track belonging to the same
+  // release. (If there is concern that such a track should not match at all
+  // because it may be a remarkably different track, other factors of
+  // resemblance -- position in album, duration, etc -- can be used to add
+  // detail to the apparent level of resemblance then.)
+  //
+  // --
+  //
+  // A note on determining which name is the "closest" -- consider
+  // the following two paths:
+  //
+  //   A/X/B/C/D/E/T
+  //   A/Y/E/B/C/D/T
+  //
+  // There are many names which appear in both paths. So which do we treat
+  // as the closest? Well, what we're looking for is the shortest path across
+  // both paths, passing through at a particular name. To do this, we simply
+  // calculate the score for each name in the intersection of both paths
+  // (i.e. every name which shows up in both paths) using the same algorithm
+  // described above (sum of the distance from the start of either path).
+  // Then we take the lowest resultant score, and use that as the final score
+  // which is returned out of this function.
+  //
+  // TODO: There are probably optimizations to be made as far as avoiding
+  //       processing every overlapping name goes (particularly once it's
+  //       determined that no other path could be determined), but honestly
+  //       I'm pretty sure if I tried to write an algorithm taking *that*
+  //       into account, I'd end up screwing it up. :P So for now, we just
+  //       do a simple filter and reduce operation.
+  //
+  // If the intersection of the two paths is empty (i.e. there is no overlap),
+  // we return the otherwise nonsense value, -1.
+
+  const union = Array.from(new Set([...path1, ...path2]))
+  const intersection = union.filter(
+    name => path1.includes(name) && path2.includes(name))
+
+  if (!intersection.length) {
+    return -1
+  }
+
+  const reversed1 = path1.reverse()
+  const reversed2 = path2.reverse()
+
+  const scores = intersection.map(
+    name => reversed1.indexOf(name) + reversed2.indexOf(name))
+
+  return scores.reduce((a, b) => a < b ? a : b)
+}
+
+function getNameScore(name1, name2) {
+  // Pretty simple algorithm here: we're looking for the longest continuous
+  // series of words which is shared between both names. The score is the
+  // length of that series, so a higher score is better (and a zero score
+  // means no overlap).
+
+  // TODO:
+  // This ain't perfect! Case example: User A has library structure:
+  //
+  //   Very Cool Album/
+  //     01 Beans
+  //
+  // User B has library structure:
+  //
+  //   Very Cool Album/
+  //     Very Cool Album- 01 Beans
+  //
+  // Now if user B queues 'Very Cool Album- 01 Beans', the search will match
+  // the *group* 'Very Cool Album' on User A's end, because that name has a
+  // 3-word match, in comparison to the track '01 Beans', which is only a
+  // 2-word match. Not sure what a proper solution here would be, but probably
+  // it'd involve somehow prioritizing series of words which match closer to
+  // the end!
+
+  // Split into chunks of word characters, taking out any non-word (\W)
+  // characters between.
+  const toWords = name => name.split(/\W+/)
+
+  const words1 = toWords(name1)
+  const words2 = toWords(name2)
+
+  const getLongestMatch = (parse, against) => {
+    let longestMatch = 0
+
+    for (let i = 0; i < parse.length; i++) {
+      const word = parse[i]
+
+      for (let j = 0; j < against.length; j++) {
+        if (against[j] !== word) {
+          continue
+        }
+
+        let offset = 1
+        while (
+          parse[i + offset] &&
+          against[i + offset] &&
+          parse[i + offset] === against[j + offset]
+        ) {
+          offset++
+        }
+
+        if (offset > longestMatch) {
+          longestMatch = offset
+        }
+      }
+    }
+
+    return longestMatch
+  }
+
+  return Math.max(
+    getLongestMatch(words1, words2),
+    getLongestMatch(words2, words1)
+  )
+}
+
+function findItemObject(referenceData, possibleChoices) {
+  // Finds the item object in the provided choices which most closely resembles
+  // the provided reference data. This is used for maintaining the identity of
+  // item objects when reloading a playlist (see serialized-backend.js). It's
+  // also usable in synchronizing the identity of items across linked clients
+  // (see socket.js).
+
+  // Reference data includes item NAME and item PATH (names of parent groups).
+  // Specifics of how existing item objects are determined to resemble this
+  // data are laid out next to the relevant implementation code.
+  //
+  // TODO: Should track number be considered here?
+  // TODO: Should track "metadata" (duration, md5?) be considered too?
+  //       This in particular prompts questions of what the purpose of matching
+  //       tracks *is*, and in considering those I lean towards "no" here, but
+  //       it's probably worth looking at more in the future. (TM.)
+
+  function getItemPathScore(item) {
+    if (!referenceData.path) {
+      return null
+    }
+
+    const path1 = referenceData.path.slice()
+    const path2 = getItemPath(item).slice(0, -1).map(group => group.name)
+    return getPathScore(path1, path2)
+  }
+
+  function getItemNameScore(item) {
+    const name1 = referenceData.name
+    const name2 = item.name
+    return getNameScore(name1, name2)
+  }
+
+  // The only items which will be considered at all are those which at least
+  // partially match the reference name.
+  const baselineResemble = possibleChoices.map(item => ({
+    item,
+    nameScore: getItemNameScore(item)
+  })).filter(item => item.nameScore > 0)
+
+  // If no item matches the baseline conditions for resemblance at all,
+  // return null. It's up to the caller to decide what to do in this case,
+  // e.g. reporting that no item was found, or creating a new item object
+  // from the reference data altogether.
+  if (!baselineResemble.length) {
+    return null
+  }
+
+  // Find the "reasons" these items resemble the reference data; these will
+  // be used as the factors in calculating which item resembles closest.
+  const reasons = baselineResemble.map(({item, nameScore}) => ({
+    item,
+    pathScore: getItemPathScore(item),
+    nameScore
+  }))
+
+  // TODO: Are there circumstances in which a strong path score should be
+  //       prioritized in spite of weaker name score?
+
+  // Sort by closest matching filenames first.
+  reasons.sort((a, b) => b.nameScore - a.nameScore)
+
+  // Filter only the best name matches.
+  const bestNameScore = reasons[0].nameScore
+  const bestName = reasons.filter(({ nameScore }) => nameScore === bestNameScore)
+
+  // Then choose the best matching path.
+  const sharePath = bestName.filter(({ pathScore }) => pathScore >= 0)
+  const mostResembles = (sharePath.length
+    ? sharePath.reduce((a, b) => a.pathScore < b.pathScore ? a : b)
+    : reasons[0])
+
+  return mostResembles.item
+}
+
 module.exports = {
   parentSymbol,
   updatePlaylistFormat, updateGroupFormat, updateTrackFormat,
   cloneGrouplike,
   filterTracks,
-  flattenGrouplike, countTotalTracks,
+  flattenGrouplike,
+  getFlatTrackList,
+  getFlatGroupList,
+  countTotalTracks,
   shuffleOrderOfGroups,
   reverseOrderOfGroups,
   partiallyFlattenGrouplike, collapseGrouplike,
@@ -712,11 +1000,44 @@ module.exports = {
   searchForItem,
   getCorrespondingFileForItem,
   getCorrespondingPlayableForFile,
+  getPathScore,
+  findItemObject,
   isGroup, isTrack,
   isOpenable, isPlayable
 }
 
 if (require.main === module) {
+  console.log(getPathScore(['A', 'B', 'C'], ['A', 'B', 'C']))
+  console.log(getPathScore(['A', 'B', 'C'], ['A', 'B', 'C', 'D']))
+  console.log(getPathScore(['A', 'B', 'C', 'E'], ['A', 'B', 'C']))
+  console.log(getPathScore(['W', 'X'], ['Y', 'Z']))
+  console.log(getNameScore('C418 - Vlem', 'Vlem'))
+  console.log(getNameScore('glimmer', 'glimmer'))
+  console.log(getNameScore('C418 - Vlem', 'covet - glimmer'))
+  console.log(findItemObject(
+    // {name: 'T', downloaderArg: 'foo', path: ['A', 'B', 'C']},
+    {name: 'B'},
+    // getFlatTrackList(
+    getFlatGroupList(
+      updateGroupFormat({items: [
+        {id: 1, name: 'T'},
+        {id: 2, name: 'T'},
+        {id: 3, name: 'T'},
+        // {id: 4, name: 'T', downloaderArg: 'foo'},
+        {id: 5, name: 'T'},
+        {id: 6, name: 'Y', downloaderArg: 'foo'},
+        {name: 'A', items: [
+          {name: 'B', items: [
+            {name: 'C', items: [
+              {name: 'T'}
+            ]},
+            {name: 'T'}
+          ]}
+        ]}
+      ]})
+    )
+  ))
+
   {
     const group = updateGroupFormat({items: [
       {name: '- 1.01 Hello World 425', downloaderArg: 'x'},