check partial name matches; don't check DL args

not thoroughly tested yet! lol
author: (quasar) nebula <towerofnix@gmail.com> 2021-03-16 22:38:38 -0300
committer: (quasar) nebula <qznebula@protonmail.com> 2024-05-16 19:17:00 -0300
commit: 479cf8c7db13f6ed3cdfed5078ee5d1077dfa104 (patch)
tree: 0e1facf4d4d8fddc71ca251890b3b2d6395a153e
parent: 811387200fc4e9c0d368e9f23a701c55697b54ef (diff)
1 files changed, 84 insertions, 40 deletions
diff --git a/playlist-utils.js b/playlist-utils.js
index 7c742ae..041b530 100644
--- a/playlist-utils.js
+++ b/playlist-utils.js
@@ -862,6 +862,54 @@ export function getPathScore(path1, path2) {
   return scores.reduce((a, b) => a < b ? a : b)
 }
 
+export function getNameScore(name1, name2) {
+  // Pretty simple algorithm here: we're looking for the longest continuous
+  // series of words which is shared between both names. The score is the
+  // length of that series, so a higher score is better (and a zero score
+  // means no overlap).
+
+  // Split into chunks of word characters, taking out any non-word (\W)
+  // characters between.
+  const toWords = name => name.split(/\W+/)
+
+  const words1 = toWords(name1)
+  const words2 = toWords(name2)
+
+  const getLongestMatch = (parse, against) => {
+    let longestMatch = 0
+
+    for (let i = 0; i < parse.length; i++) {
+      const word = parse[i]
+
+      for (let j = 0; j < against.length; j++) {
+        if (against[j] !== word) {
+          continue
+        }
+
+        let offset = 1
+        while (
+          parse[i + offset] &&
+          against[i + offset] &&
+          parse[i + offset] === against[j + offset]
+        ) {
+          offset++
+        }
+
+        if (offset > longestMatch) {
+          longestMatch = offset
+        }
+      }
+    }
+
+    return longestMatch
+  }
+
+  return Math.max(
+    getLongestMatch(words1, words2),
+    getLongestMatch(words2, words1)
+  )
+}
+
 export function findItemObject(referenceData, possibleChoices) {
   // Finds the item object in the provided choices which most closely resembles
   // the provided reference data. This is used for maintaining the identity of
@@ -869,10 +917,9 @@ export function findItemObject(referenceData, possibleChoices) {
   // also usable in synchronizing the identity of items across linked clients
   // (see socket.js).
 
-  // Reference data includes item NAME, item SOURCE (downloaderArg), and item
-  // PATH (names of parent groups). Specifics of how existing item  objects are
-  // determined to resemble this data are laid out next to the relevant
-  // implementation code.
+  // Reference data includes item NAME and item PATH (names of parent groups).
+  // Specifics of how existing item objects are determined to resemble this
+  // data are laid out next to the relevant implementation code.
   //
   // TODO: Should track number be considered here?
   // TODO: Should track "metadata" (duration, md5?) be considered too?
@@ -890,11 +937,18 @@ export function findItemObject(referenceData, possibleChoices) {
     return getPathScore(path1, path2)
   }
 
-  // The only items which will be considered at all are those which match at
-  // least one of the reference name/source.
-  const baselineResemble = possibleChoices.filter(item =>
-    item.name === referenceData.name ||
-    item.downloaderArg && item.downloaderArg === referenceData.downloaderArg)
+  function getItemNameScore(item) {
+    const name1 = referenceData.name
+    const name2 = item.name
+    return getNameScore(name1, name2)
+  }
+
+  // The only items which will be considered at all are those which at least
+  // partially match the reference name.
+  const baselineResemble = possibleChoices.map(item => ({
+    item,
+    nameScore: getItemNameScore(item)
+  })).filter(item => item.nameScore > 0)
 
   // If no item matches the baseline conditions for resemblance at all,
   // return null. It's up to the caller to decide what to do in this case,
@@ -906,40 +960,27 @@ export function findItemObject(referenceData, possibleChoices) {
 
   // Find the "reasons" these items resemble the reference data; these will
   // be used as the factors in calculating which item resembles closest.
-  const reasons = baselineResemble.map(item => ({
+  const reasons = baselineResemble.map(({item, nameScore}) => ({
     item,
-    nameMatches: item.name === referenceData.name,
-    sourceMatches: item.downloaderArg && item.downloaderArg === referenceData.downloaderArg,
-    pathScore: getItemPathScore(item)
+    pathScore: getItemPathScore(item),
+    nameScore
   }))
 
-  // TODO: The algorithm for determining which track matches closest is
-  //       rudimentary at best right now. It would be well improved with
-  //       better-detailed reasoning! That said, here's the current code
-  //       laid out explicitly:
-  //
-  // Initial sort by matches is (NAME & SOURCE), SOURCE, NAME. Track which
-  // most closely matches path is returned thereafter, with ties broken by
-  // the initial sort. (If name, source, *and* path all are equal, first
-  // track as ordered in the source playlist/parent group is returned.)
-  // If no tracks have any path overlap, the first item in the sorted list
-  // is returned (since it is the closest match). (Again, ties here are
-  // broken according to source ordering.)
-
-  reasons.sort((a, b) =>
-    a.sourceMatches && !b.sourceMatches ? -1 :
-    !a.sourceMatches && b.sourceMatches ? 1 :
-    a.nameMatches && !b.nameMatches ? -1 :
-    !a.nameMatches && b.nameMatches ? 1 :
-    0)
-
-  let mostResembles
-  const sharePath = reasons.filter(({ pathScore }) => pathScore >= 0)
-  if (sharePath.length) {
-    mostResembles = sharePath.reduce((a, b) => a.pathScore < b.pathScore ? a : b)
-  } else {
-    mostResembles = reasons[0]
-  }
+  // TODO: Are there circumstances in which a strong path score should be
+  //       prioritized in spite of weaker name score?
+
+  // Sort by closest matching filenames first.
+  reasons.sort((a, b) => b.nameScore - a.nameScore)
+
+  // Filter only the best name matches.
+  const bestNameScore = reasons[0].nameScore
+  const bestName = reasons.filter(({ nameScore }) => nameScore === bestNameScore)
+
+  // Then choose the best matching path.
+  const sharePath = bestName.filter(({ pathScore }) => pathScore >= 0)
+  const mostResembles = (sharePath.length
+    ? sharePath.reduce((a, b) => a.pathScore < b.pathScore ? a : b)
+    : reasons[0])
 
   return mostResembles.item
 }
@@ -949,6 +990,9 @@ console.log(getPathScore(['A', 'B', 'C'], ['A', 'B', 'C']))
 console.log(getPathScore(['A', 'B', 'C'], ['A', 'B', 'C', 'D']))
 console.log(getPathScore(['A', 'B', 'C', 'E'], ['A', 'B', 'C']))
 console.log(getPathScore(['W', 'X'], ['Y', 'Z']))
+console.log(getNameScore('C418 - Vlem', 'Vlem'))
+console.log(getNameScore('glimmer', 'glimmer'))
+console.log(getNameScore('C418 - Vlem', 'covet - glimmer'))
 console.log(findItemObject(
   // {name: 'T', downloaderArg: 'foo', path: ['A', 'B', 'C']},
   {name: 'B'},
author	(quasar) nebula <towerofnix@gmail.com>	2021-03-16 22:38:38 -0300
committer	(quasar) nebula <qznebula@protonmail.com>	2024-05-16 19:17:00 -0300
commit	479cf8c7db13f6ed3cdfed5078ee5d1077dfa104 (patch)
tree	0e1facf4d4d8fddc71ca251890b3b2d6395a153e
parent	811387200fc4e9c0d368e9f23a701c55697b54ef (diff)