From 7572a9f46ff1438d4199306077c1dacd1233176a Mon Sep 17 00:00:00 2001 From: liam4 Date: Mon, 29 May 2017 20:12:33 +0000 Subject: General update --- crawl-itunes.js | 76 ++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 19 deletions(-) (limited to 'crawl-itunes.js') diff --git a/crawl-itunes.js b/crawl-itunes.js index 7a98be9..3c0f3f7 100644 --- a/crawl-itunes.js +++ b/crawl-itunes.js @@ -1,12 +1,13 @@ const fetch = require('node-fetch') +const MAX_DOWNLOAD_ATTEMPTS = 5 + function parseDirectoryListing(text) { // Matches all links in a directory listing. // Returns an array where each item is in the format [href, label]. if (!(text.includes('Directory listing for'))) { - console.warn("Not a directory listing! Crawl returning empty array.") - return [] + throw 'NOT_DIRECTORY_LISTING' } const regex = /([^>]*)<\/a>/g @@ -18,28 +19,65 @@ function parseDirectoryListing(text) { return output } -function crawl(absURL) { +function crawl(absURL, attempts = 0) { return fetch(absURL) - .then(res => res.text(), err => { - console.warn('FAILED: ' + absURL) - return 'Oops' + .then(res => res.text().then(text => playlistifyParse(text, absURL)), err => { + console.error('Failed to download: ' + absURL) + + if (attempts < MAX_DOWNLOAD_ATTEMPTS) { + console.error( + 'Trying again. Attempt ' + (attempts + 1) + + '/' + MAX_DOWNLOAD_ATTEMPTS + '...' + ) + return crawl(absURL, attempts + 1) + } else { + console.error( + 'We\'ve hit the download attempt limit (' + + MAX_DOWNLOAD_ATTEMPTS + '). Giving up on ' + + 'this path.' + ) + throw 'FAILED_DOWNLOAD' + } + }) + .catch(error => { + if (error === 'FAILED_DOWNLOAD') { + // Debug logging for this is already handled above. + return [] + } else { + throw error + } }) - .then(text => parseDirectoryListing(text)) - .then(links => Promise.all(links.map(link => { - const [ href, title ] = link +} - if (href.endsWith('/')) { - // It's a directory! +function playlistifyParse(text, absURL) { + const links = parseDirectoryListing(text) + return Promise.all(links.map(link => { + const [ href, title ] = link - console.log('[Dir] ' + absURL + href) - return crawl(absURL + href).then(res => [title, res]) - } else { - // It's a file! + const verbose = process.argv.includes('--verbose') - console.log('[File] ' + absURL + href) - return Promise.resolve([title, absURL + href]) - } - }))) + if (href.endsWith('/')) { + // It's a directory! + + if (verbose) console.log('[Dir] ' + absURL + href) + return crawl(absURL + href) + .then(res => [title, res]) + .catch(error => { + if (error === 'NOT_DIRECTORY_LISTING') { + console.error('Not a directory listing: ' + absURL) + return [] + } else { + throw error + } + }) + } else { + // It's a file! + + if (verbose) console.log('[File] ' + absURL + href) + return Promise.resolve([title, absURL + href]) + } + })).catch(error => { + }) } crawl('http://192.168.2.19:1233/') -- cgit 1.3.0-6-gf8a5