diff options
Diffstat (limited to 'playlist-utils.js')
-rw-r--r-- | playlist-utils.js | 341 |
1 files changed, 331 insertions, 10 deletions
diff --git a/playlist-utils.js b/playlist-utils.js index 1015748..979c6d6 100644 --- a/playlist-utils.js +++ b/playlist-utils.js @@ -166,15 +166,30 @@ function flattenGrouplike(grouplike) { // levels in the group tree and returns them as a new group containing those // tracks. - return { - items: grouplike.items.map(item => { - if (isGroup(item)) { - return flattenGrouplike(item).items - } else { - return [item] - } - }).reduce((a, b) => a.concat(b), []) - } + return {items: getFlatTrackList(grouplike)} +} + +function getFlatTrackList(grouplike) { + // Underlying function for flattenGrouplike. Can be used if you just want to + // get an array and not a grouplike, too. + + return grouplike.items.map(item => { + if (isGroup(item)) { + return getFlatTrackList(item) + } else { + return [item] + } + }).reduce((a, b) => a.concat(b), []) +} + +function getFlatGroupList(grouplike) { + // Analogue of getFlatTrackList for groups instead of tracks. Returns a flat + // array of all the groups in each level of the provided grouplike. + + return grouplike.items + .filter(isGroup) + .map(item => [item, ...getFlatGroupList(item)]) + .reduce((a, b) => a.concat(b), []) } function countTotalTracks(item) { @@ -692,12 +707,285 @@ function getCorrespondingPlayableForFile(item) { return parent.items.find(item => isPlayable(item) && path.basename(item.url, path.extname(item.url)) === basename) } +function getPathScore(path1, path2) { + // This function is basically only used in findTrackObject, but it's kinda + // huge and I need to test that it works outside of that context, so I'm + // sticking it on the global scope. Feel free to steal for whatever your + // weird future need for comparing any two paths is! + // + // path1 and path2 should be arrays of group names, according to the path + // you'd follow to open the groups and access a contained track. They should + // *not* include the track name, unless you want those to be considered a + // valid place for the paths to cross over! + // + // -- + // + // A path score is determined to be the number of groups which must be + // traversed across the two paths to find a matching group name and then + // reach the other track under that group. A lower score implies a closer + // match (since score increases not with "closeness" but "separation"). + // + // For example, these two paths are considered to have a score of zero + // against each other ("T" represents the track): + // + // X/B/C/T + // Y/B/C/T + // + // Their separation is zero because, starting from the closest (i.e. top) + // group to either the provided track or the reference data track, it takes + // zero additional steps to reach a group whose name is shared between the + // two paths: those top groups already have the same name. + // + // The above example indicates that the pattern before the closest matching + // path does not matter. Indeed, the actual length of the path could be + // different (W/X/B/C versus Y/B/C for example), and the score would still + // be the same. Parts of the path prepending the closest matching group + // name are thus ommitted from following examples. + // + // These paths, on the other hand, have a score of one: + // + // (...)/C/T + // (...)/C/D/T + // + // The closest matching name in this path is C. It is zero steps further + // from the start of the first path (C is the start); on the other path, + // it is one step further (D must be passed first). Therefore, the total + // steps that must be travelled to reach the start of one path to the + // start of the other by passing through the closest overlapping name is + // one: 0 + 1 = 1. + // + // In determining which of two paths are a closer match to a provided + // reference path, it's important to remember that a lower score (implying + // less separation) is better. Though we'll see the following example is + // probably more common across most music libraries, a reasonably natural + // example of the path structures above occurring in a music library could + // be this: an artist directory containing both albums and stray tracks, + // where one track apparently appears as both a stray track file and in an + // adjacent album directory; or, a mixtape which contains adjacent to its + // mixed-segment track listing a folder of the unmixed segments. + // + // These paths have a score of two: + // + // (...)/B/C/T + // (...)/B/D/T + // + // With the above examples, this one is fairly self explanatory. In this + // case, the closest matching group, B, is one step away from the start + // point (the first group before the track, i.e, the top name in the path) + // in both paths. Summed, the distance (and thus the score) is two. + // + // This example demonstrates what is probably a more realistic case of two + // tracks resembling each other (e.g. having the same name or source) but + // not sharing the same path: if B represents an artist, and C & D stand in + // place (in this example) of the names of that artist's albums, then it is + // reasonable to say the directories for the album are slightly different + // across the two paths. This could be the case for two users who ended up + // naming the album directory differently, or for one user restoring from + // their own backend/playlist after having adjusted the naming structure of + // their music library. It's also possible that there could simply be two + // albums by the same artist which contain a track of the same name; in + // that case, the path score implementation is doing exactly its job by + // indicating that these tracks would have a greater score (meaning further + // separation) than when checking against the track belonging to the same + // release. (If there is concern that such a track should not match at all + // because it may be a remarkably different track, other factors of + // resemblance -- position in album, duration, etc -- can be used to add + // detail to the apparent level of resemblance then.) + // + // -- + // + // A note on determining which name is the "closest" -- consider + // the following two paths: + // + // A/X/B/C/D/E/T + // A/Y/E/B/C/D/T + // + // There are many names which appear in both paths. So which do we treat + // as the closest? Well, what we're looking for is the shortest path across + // both paths, passing through at a particular name. To do this, we simply + // calculate the score for each name in the intersection of both paths + // (i.e. every name which shows up in both paths) using the same algorithm + // described above (sum of the distance from the start of either path). + // Then we take the lowest resultant score, and use that as the final score + // which is returned out of this function. + // + // TODO: There are probably optimizations to be made as far as avoiding + // processing every overlapping name goes (particularly once it's + // determined that no other path could be determined), but honestly + // I'm pretty sure if I tried to write an algorithm taking *that* + // into account, I'd end up screwing it up. :P So for now, we just + // do a simple filter and reduce operation. + // + // If the intersection of the two paths is empty (i.e. there is no overlap), + // we return the otherwise nonsense value, -1. + + const union = Array.from(new Set([...path1, ...path2])) + const intersection = union.filter( + name => path1.includes(name) && path2.includes(name)) + + if (!intersection.length) { + return -1 + } + + const reversed1 = path1.reverse() + const reversed2 = path2.reverse() + + const scores = intersection.map( + name => reversed1.indexOf(name) + reversed2.indexOf(name)) + + return scores.reduce((a, b) => a < b ? a : b) +} + +function getNameScore(name1, name2) { + // Pretty simple algorithm here: we're looking for the longest continuous + // series of words which is shared between both names. The score is the + // length of that series, so a higher score is better (and a zero score + // means no overlap). + + // TODO: + // This ain't perfect! Case example: User A has library structure: + // + // Very Cool Album/ + // 01 Beans + // + // User B has library structure: + // + // Very Cool Album/ + // Very Cool Album- 01 Beans + // + // Now if user B queues 'Very Cool Album- 01 Beans', the search will match + // the *group* 'Very Cool Album' on User A's end, because that name has a + // 3-word match, in comparison to the track '01 Beans', which is only a + // 2-word match. Not sure what a proper solution here would be, but probably + // it'd involve somehow prioritizing series of words which match closer to + // the end! + + // Split into chunks of word characters, taking out any non-word (\W) + // characters between. + const toWords = name => name.split(/\W+/) + + const words1 = toWords(name1) + const words2 = toWords(name2) + + const getLongestMatch = (parse, against) => { + let longestMatch = 0 + + for (let i = 0; i < parse.length; i++) { + const word = parse[i] + + for (let j = 0; j < against.length; j++) { + if (against[j] !== word) { + continue + } + + let offset = 1 + while ( + parse[i + offset] && + against[i + offset] && + parse[i + offset] === against[j + offset] + ) { + offset++ + } + + if (offset > longestMatch) { + longestMatch = offset + } + } + } + + return longestMatch + } + + return Math.max( + getLongestMatch(words1, words2), + getLongestMatch(words2, words1) + ) +} + +function findItemObject(referenceData, possibleChoices) { + // Finds the item object in the provided choices which most closely resembles + // the provided reference data. This is used for maintaining the identity of + // item objects when reloading a playlist (see serialized-backend.js). It's + // also usable in synchronizing the identity of items across linked clients + // (see socket.js). + + // Reference data includes item NAME and item PATH (names of parent groups). + // Specifics of how existing item objects are determined to resemble this + // data are laid out next to the relevant implementation code. + // + // TODO: Should track number be considered here? + // TODO: Should track "metadata" (duration, md5?) be considered too? + // This in particular prompts questions of what the purpose of matching + // tracks *is*, and in considering those I lean towards "no" here, but + // it's probably worth looking at more in the future. (TM.) + + function getItemPathScore(item) { + if (!referenceData.path) { + return null + } + + const path1 = referenceData.path.slice() + const path2 = getItemPath(item).slice(0, -1).map(group => group.name) + return getPathScore(path1, path2) + } + + function getItemNameScore(item) { + const name1 = referenceData.name + const name2 = item.name + return getNameScore(name1, name2) + } + + // The only items which will be considered at all are those which at least + // partially match the reference name. + const baselineResemble = possibleChoices.map(item => ({ + item, + nameScore: getItemNameScore(item) + })).filter(item => item.nameScore > 0) + + // If no item matches the baseline conditions for resemblance at all, + // return null. It's up to the caller to decide what to do in this case, + // e.g. reporting that no item was found, or creating a new item object + // from the reference data altogether. + if (!baselineResemble.length) { + return null + } + + // Find the "reasons" these items resemble the reference data; these will + // be used as the factors in calculating which item resembles closest. + const reasons = baselineResemble.map(({item, nameScore}) => ({ + item, + pathScore: getItemPathScore(item), + nameScore + })) + + // TODO: Are there circumstances in which a strong path score should be + // prioritized in spite of weaker name score? + + // Sort by closest matching filenames first. + reasons.sort((a, b) => b.nameScore - a.nameScore) + + // Filter only the best name matches. + const bestNameScore = reasons[0].nameScore + const bestName = reasons.filter(({ nameScore }) => nameScore === bestNameScore) + + // Then choose the best matching path. + const sharePath = bestName.filter(({ pathScore }) => pathScore >= 0) + const mostResembles = (sharePath.length + ? sharePath.reduce((a, b) => a.pathScore < b.pathScore ? a : b) + : reasons[0]) + + return mostResembles.item +} + module.exports = { parentSymbol, updatePlaylistFormat, updateGroupFormat, updateTrackFormat, cloneGrouplike, filterTracks, - flattenGrouplike, countTotalTracks, + flattenGrouplike, + getFlatTrackList, + getFlatGroupList, + countTotalTracks, shuffleOrderOfGroups, reverseOrderOfGroups, partiallyFlattenGrouplike, collapseGrouplike, @@ -712,11 +1000,44 @@ module.exports = { searchForItem, getCorrespondingFileForItem, getCorrespondingPlayableForFile, + getPathScore, + findItemObject, isGroup, isTrack, isOpenable, isPlayable } if (require.main === module) { + console.log(getPathScore(['A', 'B', 'C'], ['A', 'B', 'C'])) + console.log(getPathScore(['A', 'B', 'C'], ['A', 'B', 'C', 'D'])) + console.log(getPathScore(['A', 'B', 'C', 'E'], ['A', 'B', 'C'])) + console.log(getPathScore(['W', 'X'], ['Y', 'Z'])) + console.log(getNameScore('C418 - Vlem', 'Vlem')) + console.log(getNameScore('glimmer', 'glimmer')) + console.log(getNameScore('C418 - Vlem', 'covet - glimmer')) + console.log(findItemObject( + // {name: 'T', downloaderArg: 'foo', path: ['A', 'B', 'C']}, + {name: 'B'}, + // getFlatTrackList( + getFlatGroupList( + updateGroupFormat({items: [ + {id: 1, name: 'T'}, + {id: 2, name: 'T'}, + {id: 3, name: 'T'}, + // {id: 4, name: 'T', downloaderArg: 'foo'}, + {id: 5, name: 'T'}, + {id: 6, name: 'Y', downloaderArg: 'foo'}, + {name: 'A', items: [ + {name: 'B', items: [ + {name: 'C', items: [ + {name: 'T'} + ]}, + {name: 'T'} + ]} + ]} + ]}) + ) + )) + { const group = updateGroupFormat({items: [ {name: '- 1.01 Hello World 425', downloaderArg: 'x'}, |