From 6055638558a345904b41467839191a7143862d25 Mon Sep 17 00:00:00 2001 From: Florrie Date: Mon, 4 Jun 2018 21:27:18 -0300 Subject: Smart playlists Basically directly pulled from http-music. Want to make a nice UI for this eventually ("opening playlist..." popup dialog), but not for now. --- crawlers.js | 259 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ general-util.js | 17 ++++ index.js | 6 +- package-lock.json | 173 ++++++++++++++++++++++++++++++++++++ package.json | 2 + smart-playlist.js | 131 +++++++++++++++++++++++++++ todo.txt | 2 + 7 files changed, 589 insertions(+), 1 deletion(-) create mode 100644 crawlers.js create mode 100644 smart-playlist.js diff --git a/crawlers.js b/crawlers.js new file mode 100644 index 0000000..0bf5c4e --- /dev/null +++ b/crawlers.js @@ -0,0 +1,259 @@ +const fs = require('fs') +const path = require('path') +const naturalSort = require('node-natural-sort') +const fetch = require('node-fetch') +const cheerio = require('cheerio') +const url = require('url') +const { downloadPlaylistFromOptionValue, promisifyProcess } = require('./general-util') +const { spawn } = require('child_process') + +const { promisify } = require('util') +const readDir = promisify(fs.readdir) +const stat = promisify(fs.stat) + +function sortIgnoreCase(sortFunction) { + return function(a, b) { + return sortFunction(a.toLowerCase(), b.toLowerCase()) + } +} + +function crawlHTTP(absURL, opts = {}, internals = {}) { + // Recursively crawls a given URL, following every link to a deeper path and + // recording all links in a tree (in the same format playlists use). Makes + // multiple attempts to download failed paths. + + const { + verbose = false, + + maxAttempts = 5, + + keepSeparateHosts = false, + stayInSameDirectory = true, + + keepAnyFileType = false, + fileTypes = ['wav', 'ogg', 'oga', 'mp3', 'mp4', 'm4a', 'mov', 'mpga', 'mod'], + + filterRegex = null + } = opts + + if (!internals.attempts) internals.attempts = 0 + + // TODO: Should absURL initially be added into this array? I'd like to + // re-program this entire crawl function to make more sense - "internal" + // dictionaries aren't quite easy to reason about! + if (!internals.allURLs) internals.allURLs = [] + + const verboseLog = text => { + if (verbose) { + console.error(text) + } + } + + const absURLObj = new url.URL(absURL) + + return fetch(absURL) + .then( + res => res.text().then(async text => { + const links = getHTMLLinks(text) + + const items = [] + + for (const link of links) { + let [ name, href ] = link + + // If the name (that's the content inside of ..) ends with a + // slash, that's probably just an artifact of a directory lister; + // not actually part of the intended content. So we remove it! + if (name.endsWith('/')) { + name = name.slice(0, -1) + } + + name = name.trim() + + const urlObj = new url.URL(href, absURL + '/') + const linkURL = url.format(urlObj) + + if (internals.allURLs.includes(linkURL)) { + verboseLog("[Ignored] Already done this URL: " + linkURL) + continue + } + + internals.allURLs.push(linkURL) + + if (filterRegex && !(filterRegex.test(linkURL))) { + verboseLog("[Ignored] Failed regex: " + linkURL) + continue + } + + if (!keepSeparateHosts && urlObj.host !== absURLObj.host) { + verboseLog("[Ignored] Inconsistent host: " + linkURL) + continue + } + + if (stayInSameDirectory) { + const relative = path.relative(absURLObj.pathname, urlObj.pathname) + if (relative.startsWith('..') || path.isAbsolute(relative)) { + verboseLog("[Ignored] Outside of parent directory: " + linkURL) + continue + } + } + + if (href.endsWith('/')) { + // It's a directory! + + verboseLog("[Dir] " + linkURL) + + items.push(await ( + crawlHTTP(linkURL, opts, Object.assign({}, internals)) + .then(({ items }) => ({name, items})) + )) + } else { + // It's a file! + + const extensions = fileTypes.map(t => '.' + t) + + if ( + !keepAnyFileType && + !(extensions.includes(path.extname(href))) + ) { + verboseLog("[Ignored] Bad extension: " + linkURL) + continue + } + + verboseLog("[File] " + linkURL) + items.push({name, downloaderArg: linkURL}) + } + } + + return {items} + }), + + err => { + console.warn("Failed to download: " + absURL) + + if (internals.attempts < maxAttempts) { + console.warn( + `Trying again. Attempt ${internals.attempts + 1}/${maxAttempts}...` + ) + + return crawlHTTP(absURL, opts, Object.assign({}, internals, { + attempts: internals.attempts + 1 + })) + } else { + console.error( + "We've hit the download attempt limit (" + maxAttempts + "). " + + "Giving up on this path." + ) + + throw 'FAILED_DOWNLOAD' + } + } + ) + .catch(error => { + if (error === 'FAILED_DOWNLOAD') { + // Debug logging for this is already handled above. + return [] + } else { + throw error + } + }) +} + +function getHTMLLinks(text) { + // Never parse HTML with a regex! + const $ = cheerio.load(text) + + return $('a').get().map(el => { + const $el = $(el) + return [$el.text(), $el.attr('href')] + }) +} + + + +function crawlLocal(dirPath, extensions = [ + 'ogg', 'oga', + 'wav', 'mp3', 'mp4', 'm4a', 'aac', + 'mod' +]) { + return readDir(dirPath).then(items => { + items.sort(sortIgnoreCase(naturalSort())) + + return Promise.all(items.map(item => { + const itemPath = path.join(dirPath, item) + + return stat(itemPath).then(stats => { + if (stats.isDirectory()) { + return crawlLocal(itemPath, extensions) + .then(group => Object.assign({name: item}, group)) + } else if (stats.isFile()) { + // Extname returns a string starting with a dot; we don't want the + // dot, so we slice it off of the front. + const ext = path.extname(item).slice(1) + + if (extensions.includes(ext)) { + // The name of the track doesn't include the file extension; a user + // probably wouldn't add the file extensions to a hand-written + // playlist, or want them in an auto-generated one. + const basename = path.basename(item, path.extname(item)) + + const track = {name: basename, downloaderArg: itemPath} + return track + } else { + return null + } + } + }) + })) + }).then(items => items.filter(Boolean)) + .then(filteredItems => ({items: filteredItems})) +} + +async function crawlYouTube(url) { + const ytdl = spawn('youtube-dl', [ + '-j', // Output as JSON + '--flat-playlist', + url + ]) + + const items = [] + + ytdl.stdout.on('data', data => { + const lines = data.toString().trim().split('\n') + + items.push(...lines.map(JSON.parse)) + }) + + // Pass false so it doesn't show logging. + await promisifyProcess(ytdl, false) + + return { + items: items.map(item => { + return { + name: item.title, + downloaderArg: 'https://youtube.com/watch?v=' + item.id + } + }) + } +} + +async function openFile(input) { + return JSON.parse(await downloadPlaylistFromOptionValue(input)) +} + +module.exports = { + crawlHTTP, + crawlLocal, + crawlYouTube, + openFile, + + getCrawlerByName: function(name) { + switch (name) { + case 'crawl-http': return crawlHTTP + case 'crawl-local': return crawlLocal + case 'crawl-youtube': return crawlYouTube + case 'open-file': return openFile + default: return null + } + } +} diff --git a/general-util.js b/general-util.js index 35e1103..879219d 100644 --- a/general-util.js +++ b/general-util.js @@ -46,3 +46,20 @@ module.exports.killProcess = async function(proc) { proc.kill() } } + +function downloadPlaylistFromURL(url) { + return fetch(url).then(res => res.text()) +} + +function downloadPlaylistFromLocalPath(path) { + return readFile(path).then(buf => buf.toString()) +} + +module.exports.downloadPlaylistFromOptionValue = function(arg) { + // TODO: Verify things! + if (arg.startsWith('http://') || arg.startsWith('https://')) { + return downloadPlaylistFromURL(arg) + } else { + return downloadPlaylistFromLocalPath(arg) + } +} diff --git a/index.js b/index.js index df60207..72c2383 100644 --- a/index.js +++ b/index.js @@ -2,6 +2,7 @@ const { AppElement } = require('./ui') const { updatePlaylistFormat } = require('./playlist-utils') +const processSmartPlaylist = require('./smart-playlist') const ansi = require('./tui-lib/util/ansi') const CommandLineInterfacer = require('./tui-lib/util/CommandLineInterfacer') const EventEmitter = require('events') @@ -65,10 +66,13 @@ async function main() { } if (process.argv[2]) { + flushable.write(ansi.moveCursor(0, 0)) + flushable.write('Opening playlist...') + flushable.flush() grouplike = require(process.argv[2]) } - grouplike = updatePlaylistFormat(grouplike) + grouplike = await processSmartPlaylist(grouplike) appElement.grouplikeListingElement.loadGrouplike(grouplike) diff --git a/package-lock.json b/package-lock.json index e9fd90f..c0a7cfe 100644 --- a/package-lock.json +++ b/package-lock.json @@ -4,16 +4,103 @@ "lockfileVersion": 1, "requires": true, "dependencies": { + "@types/node": { + "version": "10.3.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-10.3.1.tgz", + "integrity": "sha512-IsX9aDHDzJohkm3VCDB8tkzl5RQ34E/PFA29TQk6uDGb7Oc869ZBtmdKVDBzY3+h9GnXB8ssrRXEPVZrlIOPOw==" + }, + "boolbase": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/boolbase/-/boolbase-1.0.0.tgz", + "integrity": "sha1-aN/1++YMUes3cl6p4+0xDcwed24=" + }, + "cheerio": { + "version": "1.0.0-rc.2", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0-rc.2.tgz", + "integrity": "sha1-S59TqBsn5NXawxwP/Qz6A8xoMNs=", + "requires": { + "css-select": "~1.2.0", + "dom-serializer": "~0.1.0", + "entities": "~1.1.1", + "htmlparser2": "^3.9.1", + "lodash": "^4.15.0", + "parse5": "^3.0.1" + } + }, "command-exists": { "version": "1.2.6", "resolved": "https://registry.npmjs.org/command-exists/-/command-exists-1.2.6.tgz", "integrity": "sha512-Qst/zUUNmS/z3WziPxyqjrcz09pm+2Knbs5mAZL4VAE0sSrNY1/w8+/YxeHcoBTsO6iojA6BW7eFf27Eg2MRuw==" }, + "core-util-is": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", + "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" + }, "crypto-random-string": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/crypto-random-string/-/crypto-random-string-1.0.0.tgz", "integrity": "sha1-ojD2T1aDEOFJgAmUB5DsmVRbyn4=" }, + "css-select": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-1.2.0.tgz", + "integrity": "sha1-KzoRBTnFNV8c2NMUYj6HCxIeyFg=", + "requires": { + "boolbase": "~1.0.0", + "css-what": "2.1", + "domutils": "1.5.1", + "nth-check": "~1.0.1" + } + }, + "css-what": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.0.tgz", + "integrity": "sha1-lGfQMsOM+u+58teVASUwYvh/ob0=" + }, + "dom-serializer": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-0.1.0.tgz", + "integrity": "sha1-BzxpdUbOB4DOI75KKOKT5AvDDII=", + "requires": { + "domelementtype": "~1.1.1", + "entities": "~1.1.1" + }, + "dependencies": { + "domelementtype": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.1.3.tgz", + "integrity": "sha1-vSh3PiZCiBrsUVRJJCmcXNgiGFs=" + } + } + }, + "domelementtype": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/domelementtype/-/domelementtype-1.3.0.tgz", + "integrity": "sha1-sXrtguirWeUt2cGbF1bg/BhyBMI=" + }, + "domhandler": { + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-2.4.2.tgz", + "integrity": "sha512-JiK04h0Ht5u/80fdLMCEmV4zkNh2BcoMFBmZ/91WtYZ8qVXSKjiw7fXMgFPnHcSZgOo3XdinHvmnDUeMf5R4wA==", + "requires": { + "domelementtype": "1" + } + }, + "domutils": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/domutils/-/domutils-1.5.1.tgz", + "integrity": "sha1-3NhIiib1Y9YQeeSMn3t+Mjc2gs8=", + "requires": { + "dom-serializer": "0", + "domelementtype": "1" + } + }, + "entities": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/entities/-/entities-1.1.1.tgz", + "integrity": "sha1-blwtClYhtdra7O+AuQ7ftc13cvA=" + }, "es6-error": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/es6-error/-/es6-error-3.2.0.tgz", @@ -42,11 +129,34 @@ "resolved": "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.1.11.tgz", "integrity": "sha1-Dovf5NHduIVNZOBOp8AOKgJuVlg=" }, + "htmlparser2": { + "version": "3.9.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-3.9.2.tgz", + "integrity": "sha1-G9+HrMoPP55T+k/M6w9LTLsAszg=", + "requires": { + "domelementtype": "^1.3.0", + "domhandler": "^2.3.0", + "domutils": "^1.5.1", + "entities": "^1.1.1", + "inherits": "^2.0.1", + "readable-stream": "^2.0.2" + } + }, "iac": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/iac/-/iac-1.1.0.tgz", "integrity": "sha1-C83Rc3Jy/qwj5126pFeCnYHTtMw=" }, + "inherits": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/inherits/-/inherits-2.0.3.tgz", + "integrity": "sha1-Yzwsg+PaQqUC9SRmAiSA9CCCYd4=" + }, + "isarray": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/isarray/-/isarray-1.0.0.tgz", + "integrity": "sha1-u5NdSFgsuhaMBoNJV6VKPgcSTxE=" + }, "js-base64": { "version": "2.4.5", "resolved": "https://registry.npmjs.org/js-base64/-/js-base64-2.4.5.tgz", @@ -60,6 +170,11 @@ "graceful-fs": "^4.1.6" } }, + "lodash": { + "version": "4.17.10", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.10.tgz", + "integrity": "sha512-UejweD1pDoXu+AD825lWwp4ZGtSwgnpZxb3JDViD7StjQz+Nb/6l093lx4OQ0foGWNRoc19mWy7BzL+UAK2iVg==" + }, "minimist": { "version": "0.0.8", "resolved": "https://registry.npmjs.org/minimist/-/minimist-0.0.8.tgz", @@ -78,6 +193,51 @@ "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.1.2.tgz", "integrity": "sha1-q4hOjn5X44qUR1POxwb3iNF2i7U=" }, + "node-natural-sort": { + "version": "0.8.6", + "resolved": "https://registry.npmjs.org/node-natural-sort/-/node-natural-sort-0.8.6.tgz", + "integrity": "sha1-AdxrrcR0OxYDNAjw2FiasubAlM8=" + }, + "nth-check": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/nth-check/-/nth-check-1.0.1.tgz", + "integrity": "sha1-mSms32KPwsQQmN6rgqxYDPFJquQ=", + "requires": { + "boolbase": "~1.0.0" + } + }, + "parse5": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/parse5/-/parse5-3.0.3.tgz", + "integrity": "sha512-rgO9Zg5LLLkfJF9E6CCmXlSE4UVceloys8JrFqCcHloC3usd/kJCyPDwH2SOlzix2j3xaP9sUX3e8+kvkuleAA==", + "requires": { + "@types/node": "*" + } + }, + "process-nextick-args": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.0.tgz", + "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==" + }, + "readable-stream": { + "version": "2.3.6", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-2.3.6.tgz", + "integrity": "sha512-tQtKA9WIAhBF3+VLAseyMqZeBjW0AHJoxOtYqSUZNJxauErmLbVm2FW1y+J/YA9dUrAC39ITejlZWhVIwawkKw==", + "requires": { + "core-util-is": "~1.0.0", + "inherits": "~2.0.3", + "isarray": "~1.0.0", + "process-nextick-args": "~2.0.0", + "safe-buffer": "~5.1.1", + "string_decoder": "~1.1.1", + "util-deprecate": "~1.0.1" + } + }, + "safe-buffer": { + "version": "5.1.2", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.1.2.tgz", + "integrity": "sha512-Gd2UZBJDkXlY7GbJxfsE8/nvKkUEU1G38c1siN6QP6a9PT9MmHB8GnpscSmMJSoF8LOIrt8ud/wPtojys4G6+g==" + }, "sanitize-filename": { "version": "1.6.1", "resolved": "https://registry.npmjs.org/sanitize-filename/-/sanitize-filename-1.6.1.tgz", @@ -86,6 +246,14 @@ "truncate-utf8-bytes": "^1.0.0" } }, + "string_decoder": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.1.1.tgz", + "integrity": "sha512-n/ShnvDi6FHbbVfviro+WojiFzv+s8MPMHBczVePfUpDJLwoLT0ht1l4YwBCbi8pJAveEEdnkHyPyTP/mzRfwg==", + "requires": { + "safe-buffer": "~5.1.0" + } + }, "temp-dir": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/temp-dir/-/temp-dir-1.0.0.tgz", @@ -125,6 +293,11 @@ "version": "1.0.4", "resolved": "https://registry.npmjs.org/utf8-byte-length/-/utf8-byte-length-1.0.4.tgz", "integrity": "sha1-9F8VDExm7uloGGUFq5P8u4rWv2E=" + }, + "util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha1-RQ1Nyfpw3nMnYvvS1KKJgUGaDM8=" } } } diff --git a/package.json b/package.json index b26650a..e2501ab 100644 --- a/package.json +++ b/package.json @@ -9,6 +9,7 @@ "author": "", "license": "GPL-3.0", "dependencies": { + "cheerio": "^1.0.0-rc.2", "command-exists": "^1.2.6", "fifo-js": "^2.1.0", "fs-extra": "^6.0.1", @@ -16,6 +17,7 @@ "js-base64": "^2.4.5", "mkdirp": "^0.5.1", "node-fetch": "^2.1.2", + "node-natural-sort": "^0.8.6", "sanitize-filename": "^1.6.1", "tempy": "^0.2.1" } diff --git a/smart-playlist.js b/smart-playlist.js new file mode 100644 index 0000000..09badd9 --- /dev/null +++ b/smart-playlist.js @@ -0,0 +1,131 @@ +const { getCrawlerByName } = require('./crawlers') +const { isGroup, filterTracks, sourceSymbol, updatePlaylistFormat } = require('./playlist-utils') + +async function processSmartPlaylist(item, topItem = true) { + // Object.assign is used so that we keep original properties, e.g. "name" + // or "apply". (It's also used so we return copies of original objects.) + + if (topItem) { + item = await updatePlaylistFormat(item) + } + + const newItem = Object.assign({}, item) + + if ('source' in newItem) { + const [ name, ...args ] = item.source + + const crawl = getCrawlerByName(name) + + if (crawl) { + Object.assign(newItem, await crawl(...args)) + } else { + console.error(`No crawler by name ${name} - skipped item:`, item) + newItem.failed = true + } + + delete newItem.source + } else if ('items' in newItem) { + // Pass topItem = false, since we don't want to use updatePlaylistFormat + // on these items. + newItem.items = await Promise.all(item.items.map(x => processSmartPlaylist(x, false))) + } + + if ('filters' in newItem) filters: { + if (!isGroup(newItem)) { + console.warn('Filter on non-group (no effect):', newItem) + break filters + } + + newItem.filters = newItem.filters.filter(filter => { + if ('tag' in filter === false) { + console.warn('Filter is missing "tag" property (skipping this filter):', filter) + return false + } + + return true + }) + + Object.assign(newItem, filterTracks(newItem, track => { + for (const filter of newItem.filters) { + const { tag } = filter + + let value = track + for (const key of tag.split('.')) { + if (key in Object(value)) { + value = value[key] + } else { + console.warn(`In tag "${tag}", key "${key}" not found.`) + console.warn('...value until now:', value) + console.warn('...track:', track) + console.warn('...filter:', filter) + return false + } + } + + if ('gt' in filter && value <= filter.gt) return false + if ('lt' in filter && value >= filter.lt) return false + if ('gte' in filter && value < filter.gte) return false + if ('lte' in filter && value > filter.lte) return false + if ('least' in filter && value < filter.least) return false + if ('most' in filter && value > filter.most) return false + if ('min' in filter && value < filter.min) return false + if ('max' in filter && value > filter.max) return false + + for (const prop of ['includes', 'contains']) { + if (prop in filter) { + if (Array.isArray(value) || typeof value === 'string') { + if (!value.includes(filter.includes)) return false + } else { + console.warn( + `Value of tag "${tag}" is not an array or string, so passing ` + + `"${prop}" does not make sense.` + ) + console.warn('...value:', value) + console.warn('...track:', track) + console.warn('...filter:', filter) + return false + } + } + } + + if (filter.regex) { + if (typeof value === 'string') { + let re + try { + re = new RegExp(filter.regex) + } catch (error) { + console.warn('Invalid regular expression:', re) + console.warn('...error message:', error.message) + console.warn('...filter:', filter) + return false + } + if (!re.test(value)) return false + } else { + console.warn( + `Value of tag "${tag}" is not a string, so passing "regex" ` + + 'does not make sense.' + ) + console.warn('...value:', value) + console.warn('...track:', track) + console.warn('...filter:', filter) + return false + } + } + } + + return true + })) + + delete newItem.filters + } + + if (topItem) { + // We pass true so that the playlist-format-updater knows that this + // is going to be the source playlist, probably. + return updatePlaylistFormat(newItem, true) + } else { + return newItem + } +} + +module.exports = processSmartPlaylist diff --git a/todo.txt b/todo.txt index f682a1a..044a508 100644 --- a/todo.txt +++ b/todo.txt @@ -23,3 +23,5 @@ TODO: Pressing enter in the queue seems to not be doing the right thing? It should NOT move the selected item anywhere in the queue; it should just select and play that track. (Done!) + +TODO: iTunes downloader - test this. -- cgit 1.3.0-6-gf8a5