From 8107e3f49ef959061d7fe0e04ef2f6eda01df354 Mon Sep 17 00:00:00 2001 From: liam4 Date: Wed, 21 Jun 2017 19:04:48 +0000 Subject: Ugh!! --- src/download-playlist.js | 95 ++++++++++++++++++++++++++++++++++++++++++++++++ src/downloaders.js | 22 ++++++++++- src/playlist-utils.js | 8 +++- 3 files changed, 123 insertions(+), 2 deletions(-) create mode 100644 src/download-playlist.js diff --git a/src/download-playlist.js b/src/download-playlist.js new file mode 100644 index 0000000..eb6375a --- /dev/null +++ b/src/download-playlist.js @@ -0,0 +1,95 @@ +'use strict' + +const fs = require('fs') +const downloaders = require('./downloaders') +const path = require('path') +const sanitize = require('sanitize-filename') + +const { + isGroup, isTrack +} = require('./playlist-utils') + +const { promisify } = require('util') + +const access = promisify(fs.access) +const mkdir = promisify(fs.mkdir) +const readFile = promisify(fs.readFile) +const readdir = promisify(fs.readdir) +const rename = promisify(fs.rename) +const stat = promisify(fs.stat) +const writeFile = promisify(fs.writeFile) + +async function downloadCrawl(playlist, downloader, outPath = './out/') { + let doesExist = true + try { + doesExist = (await stat(outPath)).isDirectory() + } catch(err) { + doesExist = false + } + + if (!doesExist) { + await mkdir(outPath) + } + + return Promise.all(playlist.map(async (item) => { + if (isGroup(item)) { + // TODO: Not sure if this is the best way to pick the next out dir. + const out = outPath + sanitize(item[0]) + '/' + + return [item[0], await downloadCrawl(item[1], downloader, out)] + } else if (isTrack(item)) { + console.log(`\x1b[2m${item[0]} - ${item[1]}\x1b[0m`) + + // TODO: How to deal with songs that don't have an extension? + const ext = path.extname(item[1]) + const base = path.basename(item[1], ext) + + const items = await readdir(outPath) + const match = items.find(x => path.basename(x, path.extname(x)) === base) + if (match) { + return [item[0], outPath + match] + } + + const downloadFile = await downloader(item[1]) + // const base = path.basename(downloadFile) + // const out = outPath + base + + // console.log(`\x1b[1m${downloadFile}\x1b[0m`) + + try { + await rename(downloadFile, path.resolve(out)) + console.log(`\x1b[1m${out}\x1b[0m`) + return [item[0], out] + } catch(err) { + console.error(`\x1b[31mFAILED: ${out}\x1b[0m`) + console.error(err) + return false + } + } + })).then(p => p.filter(Boolean)) +} + +async function main() { + // TODO: Implement command line stuff here + + if (process.argv.length === 2) { + console.error('Usage: download-playlist [opts]') + process.exit(1) + return + } + + const playlist = JSON.parse(await readFile(process.argv[2])) + + const dl = downloaders.makePowerfulDownloader( + downloaders.makeHTTPDownloader() + ) + + const outPlaylist = await downloadCrawl(playlist, dl) + + writeFile('out/playlist.json', JSON.stringify(outPlaylist, null, 2)) + + console.log('Done - saved playlist to out/playlist.json.') +} + +main() + .catch(err => console.error(err)) diff --git a/src/downloaders.js b/src/downloaders.js index fa1f337..2b193eb 100644 --- a/src/downloaders.js +++ b/src/downloaders.js @@ -1,3 +1,5 @@ +'use strict' + const fs = require('fs') const fetch = require('node-fetch') const promisifyProcess = require('./promisify-process') @@ -47,8 +49,26 @@ function makeLocalDownloader() { } } +function makePowerfulDownloader(downloader, maxAttempts = 5) { + // This should totally be named better.. + + return async function recursive(arg, attempts = 0) { + try { + return await downloader(arg) + } catch(err) { + if (attempts < maxAttempts) { + console.warn('Failed - attempting again:', arg) + return await recursive(arg, attempts + 1) + } else { + throw err + } + } + } +} + module.exports = { makeHTTPDownloader, makeYouTubeDownloader, - makeLocalDownloader + makeLocalDownloader, + makePowerfulDownloader } diff --git a/src/playlist-utils.js b/src/playlist-utils.js index ff19ea9..13c6003 100644 --- a/src/playlist-utils.js +++ b/src/playlist-utils.js @@ -130,6 +130,7 @@ function parsePathString(pathString) { return pathParts } +// TODO: Are these two functions actually useful?? function getGroupTitle(group) { return group[0] } @@ -142,11 +143,16 @@ function isGroup(array) { return Array.isArray(array[1]) } +function isTrack(array) { + return typeof array[1] === 'string' +} + module.exports = { flattenPlaylist, filterPlaylistByPathString, filterPlaylistByPath, removeGroupByPathString, removeGroupByPath, getPlaylistTreeString, parsePathString, - getGroupTitle, getGroupContents + getGroupTitle, getGroupContents, + isGroup, isTrack } -- cgit 1.3.0-6-gf8a5 From 84a48215f19d271cfce88439a1300817f0666e85 Mon Sep 17 00:00:00 2001 From: liam4 Date: Wed, 21 Jun 2017 17:10:26 -0300 Subject: Various updates.. maybe finished?? --- package.json | 1 + src/crawl-http.js | 4 +-- src/download-playlist.js | 90 ++++++++++++++++++++++++++++++++++++++++-------- src/downloaders.js | 14 +++++++- src/http-music.js | 13 ++----- yarn.lock | 4 +++ 6 files changed, 97 insertions(+), 29 deletions(-) diff --git a/package.json b/package.json index 4dae458..f63376a 100644 --- a/package.json +++ b/package.json @@ -18,6 +18,7 @@ ], "dependencies": { "cheerio": "^1.0.0-rc.1", + "ncp": "^2.0.0", "node-fetch": "^1.7.0", "node-natural-sort": "^0.8.6", "sanitize-filename": "^1.6.1", diff --git a/src/crawl-http.js b/src/crawl-http.js index 7f51c87..020506b 100755 --- a/src/crawl-http.js +++ b/src/crawl-http.js @@ -97,9 +97,9 @@ function crawl(absURL, opts = {}, internals = {}) { err => { console.warn("Failed to download: " + absURL) - if (attempts < maxAttempts) { + if (internals.attempts < maxAttempts) { console.warn( - `Trying again. Attempt ${attempts + 1}/${maxAttempts}...` + `Trying again. Attempt ${internals.attempts + 1}/${maxAttempts}...` ) return crawl(absURL, opts, Object.assign({}, internals, { diff --git a/src/download-playlist.js b/src/download-playlist.js index eb6375a..bb6b86c 100644 --- a/src/download-playlist.js +++ b/src/download-playlist.js @@ -3,6 +3,7 @@ const fs = require('fs') const downloaders = require('./downloaders') const path = require('path') +const processArgv = require('./process-argv') const sanitize = require('sanitize-filename') const { @@ -15,11 +16,55 @@ const access = promisify(fs.access) const mkdir = promisify(fs.mkdir) const readFile = promisify(fs.readFile) const readdir = promisify(fs.readdir) -const rename = promisify(fs.rename) const stat = promisify(fs.stat) const writeFile = promisify(fs.writeFile) +const ncp = promisify(require('ncp').ncp) + +// It's typically bad to attempt to download or copy a million files at once, +// so we create a "promise delayer" that forces only several promises to run at +// at one time. +let delayPromise +{ + const INTERVAL = 50 + const MAX = 5 + + let active = 0 + + let queue = [] + + delayPromise = function(promiseMaker) { + return new Promise((resolve, reject) => { + queue.push([promiseMaker, resolve, reject]) + }) + } + + setInterval(async () => { + if (active >= MAX) { + return + } + + const top = queue.pop() + + if (top) { + const [ promiseMaker, resolve, reject ] = top + + active++ + + console.log('Going - queue: ' + queue.length) + + try { + resolve(await promiseMaker()) + } catch(err) { + reject(err) + } + + active-- + } + }, INTERVAL) +} async function downloadCrawl(playlist, downloader, outPath = './out/') { + // If the output folder doesn't exist, we should create it. let doesExist = true try { doesExist = (await stat(outPath)).isDirectory() @@ -38,30 +83,37 @@ async function downloadCrawl(playlist, downloader, outPath = './out/') { return [item[0], await downloadCrawl(item[1], downloader, out)] } else if (isTrack(item)) { - console.log(`\x1b[2m${item[0]} - ${item[1]}\x1b[0m`) - - // TODO: How to deal with songs that don't have an extension? + // TODO: How should we deal with songs that don't have an extension? const ext = path.extname(item[1]) const base = path.basename(item[1], ext) - + const out = outPath + base + ext + + // If we've already downloaded a file at some point in previous time, + // there's no need to download it again! + // + // Since we can't guarantee the extension name of the file, we only + // compare bases. + // + // TODO: This probably doesn't work well with things like the YouTube + // downloader. const items = await readdir(outPath) const match = items.find(x => path.basename(x, path.extname(x)) === base) if (match) { + console.log(`\x1b[32;2mAlready downloaded: ${out}\x1b[0m`) return [item[0], outPath + match] } - const downloadFile = await downloader(item[1]) - // const base = path.basename(downloadFile) - // const out = outPath + base + console.log(`\x1b[2mDownloading: ${item[0]} - ${item[1]}\x1b[0m`) - // console.log(`\x1b[1m${downloadFile}\x1b[0m`) + const downloadFile = await delayPromise(() => downloader(item[1])) + // console.log(downloadFile, path.resolve(out)) try { - await rename(downloadFile, path.resolve(out)) - console.log(`\x1b[1m${out}\x1b[0m`) + await delayPromise(() => ncp(downloadFile, path.resolve(out))) + console.log(`\x1b[32;1mDownloaded: ${out}\x1b[0m`) return [item[0], out] } catch(err) { - console.error(`\x1b[31mFAILED: ${out}\x1b[0m`) + console.error(`\x1b[31mFailed: ${out}\x1b[0m`) console.error(err) return false } @@ -74,21 +126,29 @@ async function main() { if (process.argv.length === 2) { console.error('Usage: download-playlist [opts]') - process.exit(1) return } const playlist = JSON.parse(await readFile(process.argv[2])) + let downloaderType = 'http' + + processArgv(process.argv.slice(3), { + '-downloader': util => { + downloaderType = util.nextArg() + } + }) + const dl = downloaders.makePowerfulDownloader( - downloaders.makeHTTPDownloader() + downloaders.getDownloader(downloaderType) ) const outPlaylist = await downloadCrawl(playlist, dl) - writeFile('out/playlist.json', JSON.stringify(outPlaylist, null, 2)) + await writeFile('out/playlist.json', JSON.stringify(outPlaylist, null, 2)) console.log('Done - saved playlist to out/playlist.json.') + process.exit(0) } main() diff --git a/src/downloaders.js b/src/downloaders.js index 2b193eb..8fa830c 100644 --- a/src/downloaders.js +++ b/src/downloaders.js @@ -70,5 +70,17 @@ module.exports = { makeHTTPDownloader, makeYouTubeDownloader, makeLocalDownloader, - makePowerfulDownloader + makePowerfulDownloader, + + getDownloader: downloaderType => { + if (downloaderType === 'http') { + return makeHTTPDownloader() + } else if (downloaderType === 'youtube') { + return makeYouTubeDownloader() + } else if (downloaderType === 'local') { + return makeLocalDownloader() + } else { + return null + } + } } diff --git a/src/http-music.js b/src/http-music.js index ed79878..68bfa77 100755 --- a/src/http-music.js +++ b/src/http-music.js @@ -224,17 +224,8 @@ setupDefaultPlaylist('./playlist.json') return } - let downloader - if (downloaderType === 'http') { - console.log("Using HTTP downloader.") - downloader = downloaders.makeHTTPDownloader() - } else if (downloaderType === 'youtube') { - console.log("Using YouTube downloader.") - downloader = downloaders.makeYouTubeDownloader() - } else if (downloaderType === 'local') { - console.log("Using local file downloader.") - downloader = downloaders.makeLocalDownloader() - } else { + let downloader = downloaders.getDownloader(downloaderType) + if (!downloader) { console.error("Invalid downloader type: " + downloaderType) return } diff --git a/yarn.lock b/yarn.lock index d459bde..dbeda92 100644 --- a/yarn.lock +++ b/yarn.lock @@ -113,6 +113,10 @@ lodash@^4.15.0: version "4.17.4" resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.4.tgz#78203a4d1c328ae1d86dca6460e369b57f4055ae" +ncp: + version "2.0.0" + resolved "https://registry.yarnpkg.com/ncp/-/ncp-2.0.0.tgz#195a21d6c46e361d2fb1281ba38b91e9df7bdbb3" + node-fetch@^1.7.0: version "1.7.0" resolved "https://registry.yarnpkg.com/node-fetch/-/node-fetch-1.7.0.tgz#3ff6c56544f9b7fb00682338bb55ee6f54a8a0ef" -- cgit 1.3.0-6-gf8a5 From 5cbda3bc6c678a9b212fc1b47d94a3b234bbf818 Mon Sep 17 00:00:00 2001 From: liam4 Date: Wed, 21 Jun 2017 21:50:28 +0000 Subject: Ignore out/ folder --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 077fc6b..5777e07 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .DS_Store node_modules /*.json +/out -- cgit 1.3.0-6-gf8a5