From 3da4cb333ca2d394a625a869b774a4e8de28e1c7 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Tue, 16 Mar 2021 22:38:38 -0300 Subject: check partial name matches; don't check DL args not thoroughly tested yet! lol --- playlist-utils.js | 124 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 84 insertions(+), 40 deletions(-) diff --git a/playlist-utils.js b/playlist-utils.js index 5fbfff8..317ff84 100644 --- a/playlist-utils.js +++ b/playlist-utils.js @@ -831,6 +831,54 @@ function getPathScore(path1, path2) { return scores.reduce((a, b) => a < b ? a : b) } +function getNameScore(name1, name2) { + // Pretty simple algorithm here: we're looking for the longest continuous + // series of words which is shared between both names. The score is the + // length of that series, so a higher score is better (and a zero score + // means no overlap). + + // Split into chunks of word characters, taking out any non-word (\W) + // characters between. + const toWords = name => name.split(/\W+/) + + const words1 = toWords(name1) + const words2 = toWords(name2) + + const getLongestMatch = (parse, against) => { + let longestMatch = 0 + + for (let i = 0; i < parse.length; i++) { + const word = parse[i] + + for (let j = 0; j < against.length; j++) { + if (against[j] !== word) { + continue + } + + let offset = 1 + while ( + parse[i + offset] && + against[i + offset] && + parse[i + offset] === against[j + offset] + ) { + offset++ + } + + if (offset > longestMatch) { + longestMatch = offset + } + } + } + + return longestMatch + } + + return Math.max( + getLongestMatch(words1, words2), + getLongestMatch(words2, words1) + ) +} + function findItemObject(referenceData, possibleChoices) { // Finds the item object in the provided choices which most closely resembles // the provided reference data. This is used for maintaining the identity of @@ -838,10 +886,9 @@ function findItemObject(referenceData, possibleChoices) { // also usable in synchronizing the identity of items across linked clients // (see socket.js). - // Reference data includes item NAME, item SOURCE (downloaderArg), and item - // PATH (names of parent groups). Specifics of how existing item objects are - // determined to resemble this data are laid out next to the relevant - // implementation code. + // Reference data includes item NAME and item PATH (names of parent groups). + // Specifics of how existing item objects are determined to resemble this + // data are laid out next to the relevant implementation code. // // TODO: Should track number be considered here? // TODO: Should track "metadata" (duration, md5?) be considered too? @@ -859,11 +906,18 @@ function findItemObject(referenceData, possibleChoices) { return getPathScore(path1, path2) } - // The only items which will be considered at all are those which match at - // least one of the reference name/source. - const baselineResemble = possibleChoices.filter(item => - item.name === referenceData.name || - item.downloaderArg && item.downloaderArg === referenceData.downloaderArg) + function getItemNameScore(item) { + const name1 = referenceData.name + const name2 = item.name + return getNameScore(name1, name2) + } + + // The only items which will be considered at all are those which at least + // partially match the reference name. + const baselineResemble = possibleChoices.map(item => ({ + item, + nameScore: getItemNameScore(item) + })).filter(item => item.nameScore > 0) // If no item matches the baseline conditions for resemblance at all, // return null. It's up to the caller to decide what to do in this case, @@ -875,40 +929,27 @@ function findItemObject(referenceData, possibleChoices) { // Find the "reasons" these items resemble the reference data; these will // be used as the factors in calculating which item resembles closest. - const reasons = baselineResemble.map(item => ({ + const reasons = baselineResemble.map(({item, nameScore}) => ({ item, - nameMatches: item.name === referenceData.name, - sourceMatches: item.downloaderArg && item.downloaderArg === referenceData.downloaderArg, - pathScore: getItemPathScore(item) + pathScore: getItemPathScore(item), + nameScore })) - // TODO: The algorithm for determining which track matches closest is - // rudimentary at best right now. It would be well improved with - // better-detailed reasoning! That said, here's the current code - // laid out explicitly: - // - // Initial sort by matches is (NAME & SOURCE), SOURCE, NAME. Track which - // most closely matches path is returned thereafter, with ties broken by - // the initial sort. (If name, source, *and* path all are equal, first - // track as ordered in the source playlist/parent group is returned.) - // If no tracks have any path overlap, the first item in the sorted list - // is returned (since it is the closest match). (Again, ties here are - // broken according to source ordering.) - - reasons.sort((a, b) => - a.sourceMatches && !b.sourceMatches ? -1 : - !a.sourceMatches && b.sourceMatches ? 1 : - a.nameMatches && !b.nameMatches ? -1 : - !a.nameMatches && b.nameMatches ? 1 : - 0) - - let mostResembles - const sharePath = reasons.filter(({ pathScore }) => pathScore >= 0) - if (sharePath.length) { - mostResembles = sharePath.reduce((a, b) => a.pathScore < b.pathScore ? a : b) - } else { - mostResembles = reasons[0] - } + // TODO: Are there circumstances in which a strong path score should be + // prioritized in spite of weaker name score? + + // Sort by closest matching filenames first. + reasons.sort((a, b) => b.nameScore - a.nameScore) + + // Filter only the best name matches. + const bestNameScore = reasons[0].nameScore + const bestName = reasons.filter(({ nameScore }) => nameScore === bestNameScore) + + // Then choose the best matching path. + const sharePath = bestName.filter(({ pathScore }) => pathScore >= 0) + const mostResembles = (sharePath.length + ? sharePath.reduce((a, b) => a.pathScore < b.pathScore ? a : b) + : reasons[0]) return mostResembles.item } @@ -947,6 +988,9 @@ if (require.main === module) { console.log(getPathScore(['A', 'B', 'C'], ['A', 'B', 'C', 'D'])) console.log(getPathScore(['A', 'B', 'C', 'E'], ['A', 'B', 'C'])) console.log(getPathScore(['W', 'X'], ['Y', 'Z'])) + console.log(getNameScore('C418 - Vlem', 'Vlem')) + console.log(getNameScore('glimmer', 'glimmer')) + console.log(getNameScore('C418 - Vlem', 'covet - glimmer')) console.log(findItemObject( // {name: 'T', downloaderArg: 'foo', path: ['A', 'B', 'C']}, {name: 'B'}, -- cgit 1.3.0-6-gf8a5