From 8a95e5f61792748ab9bebe5fc7e551c66a178b2d Mon Sep 17 00:00:00 2001 From: Liam Date: Fri, 16 Jun 2017 21:25:59 -0300 Subject: Make max crawl-http download attempts settable by user --- man/http-music-crawl-http.1 | 78 +++++++++++++++++++++++++++++++++++++++++++++ src/crawl-http.js | 49 ++++++++++++++++++---------- todo.txt | 1 + 3 files changed, 112 insertions(+), 16 deletions(-) create mode 100644 man/http-music-crawl-http.1 diff --git a/man/http-music-crawl-http.1 b/man/http-music-crawl-http.1 new file mode 100644 index 0000000..949d682 --- /dev/null +++ b/man/http-music-crawl-http.1 @@ -0,0 +1,78 @@ +.TH http-music-crawl-http 1 + + +.SH NAME +http-music-crawl-http - create a playlist file using an HTTP-based directory listing + + +.SH SYNOPSIS +.B http-music-crawl-http +\fIdownloadURL\fR +[opts...] + + +.SH DESCRIPTION +\fBhttp-music-crawl-http\fR is a (convolutedly-named) command line utility used to generate playlist files for \fBhttp-music\fR by crawling the directory listing found at a given URL. +http-music uses playlist files as its source of music; without a playlist file, the program simply does not know what to play! + +.PP +The resulting playlist file is structured as a tree that represents the path that the crawler follows. +For instance, if the links of a directory listing give the following tree: + +.PP +.nf +.RS +http://example.com/ + Rainbows/ + Sunlight.mp3 + Rainbows.mp3 + Pineapples.mp3 + Cool Author 72/ + Good Album/ + Hello world!.mp3 + Bad News/ + Bad News - Single/ + Bad News.mp3 + Irony/ + Rahhhh!!.mp3 +.RE +.fi + +.PP +\[char46]\[char46]then the following playlist file is generated: + +.PP +.nf +.RS +[ + ['Rainbows', [ + ['Sunlight', 'http://example.com/Rainbows/Sunlight.mp3'], + ['Rainbows', 'http://example.com/Rainbows/Rainbows.mp3'], + ['Pineapples', 'http://example.com/Rainbows/Pineapples.mp3'] + ]], + ['Cool Author 72', [ + ['Good Album', [ + ['Hello World!', 'http://example.com/Cool%20Author%2072/Good%20Album/Hello%20World.mp3'], + ]] + ]], + ['Bad News', [ + ['Bad News - Single', [ + ['Bad News', 'http://example.com/Bad%20News/Bad%20News%20-%20Single/Bad%20News.mp3'] + ]], + ['Irony', [ + ['Rahhhh!!', 'http://example.com/Bad%20News/Irony/Rahhhh!!.mp3'] + ]] + ]] +] +.RE +.fi + +.PP +As you can see, the resulting playlist file follows the same structure as the directory listing. + + +.SH OPTIONS +.TP +.BR -m ", " --max-download-attempts +Sets the maximum number of times any single directory will be attempted to be downloaded, when the HTTP download request fails. +Defaults to 5. diff --git a/src/crawl-http.js b/src/crawl-http.js index fa078e3..05685e4 100755 --- a/src/crawl-http.js +++ b/src/crawl-http.js @@ -2,14 +2,13 @@ 'use strict' -const MAX_DOWNLOAD_ATTEMPTS = 5 - const fetch = require('node-fetch') const $ = require('cheerio') const url = require('url') const path = require('path') +const processArgv = require('./process-argv') -function crawl(absURL, attempts = 0) { +function crawl(absURL, maxAttempts = 5, attempts = 0) { // Recursively crawls a given URL, following every link to a deeper path and // recording all links in a tree (in the same format playlists use). Makes // multiple attempts to download failed paths. @@ -28,7 +27,7 @@ function crawl(absURL, attempts = 0) { // It's a directory! if (verbose) console.log("[Dir] " + linkURL) - return crawl(linkURL) + return crawl(linkURL, maxAttempts) .then(res => [title, res]) } else { // It's a file! @@ -42,17 +41,16 @@ function crawl(absURL, attempts = 0) { err => { console.warn("Failed to download: " + absURL) - if (attempts < MAX_DOWNLOAD_ATTEMPTS) { + if (attempts < maxAttempts) { console.warn( - "Trying again. Attempt " + (attempts + 1) + - "/" + MAX_DOWNLOAD_ATTEMPTS + "..." + `Trying again. Attempt ${attempts + 1}/${maxAttempts}...` ) - return crawl(absURL, attempts + 1) + return crawl(absURL, maxAttempts, attempts + 1) } else { console.error( - "We've hit the download attempt limit (" + - MAX_DOWNLOAD_ATTEMPTS + "). Giving up on this path." + "We've hit the download attempt limit (" + maxAttempts + "). " + + "Giving up on this path." ) throw 'FAILED_DOWNLOAD' @@ -78,13 +76,32 @@ function getHTMLLinks(text) { }) } -if (process.argv.length === 2) { - console.log("Usage: http-music-crawl-http http://.../example/path/") - console.log("..or, npm run crawl-http -- http://.../example/path/") -} else { +async function main() { let url = process.argv[2] - crawl(url) - .then(res => console.log(JSON.stringify(res, null, 2))) + let maxDownloadAttempts = 5 + + await processArgv(process.argv.slice(3), { + '-max-download-attempts': function(util) { + // --max-download-attempts (alias: -m) + // Sets the maximum number of times to attempt downloading the index for + // any one directory. Defaults to 5. + + maxDownloadAttempts = util.nextArg() + console.log(maxDownloadAttempts) + }, + + 'm': util => util.alias('-max-download-attempts') + }) + + const downloadedPlaylist = await crawl(url, maxDownloadAttempts) + + return JSON.stringify(res, null, 2) +} + +if (process.argv.length === 2) { + console.log("Usage: http-music-crawl-http http://.../example/path/ [opts]") +} else { + main() .catch(err => console.error(err)) } diff --git a/todo.txt b/todo.txt index 7620dba..1ad2215 100644 --- a/todo.txt +++ b/todo.txt @@ -139,3 +139,4 @@ TODO: Figure out why written track files (when using HTTP downloader) are TODO: Make max download attempts variable by the user (without requiring source editing, obviously). + (Done!) -- cgit 1.3.0-6-gf8a5