From 9fcf30b57a7a8f0fc86f1187d2cad72e8eaaa37e Mon Sep 17 00:00:00 2001
From: liam4 <towerofnix@gmail.com>
Date: Sun, 4 Jun 2017 10:26:54 -0300
Subject: Local downloader

---
 package.json           |  6 ++--
 src/crawl-http.js      | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/crawl-local.js     | 41 +++++++++++++++++++++++
 src/crawl-recursive.js | 91 --------------------------------------------------
 todo.txt               |  1 +
 5 files changed, 137 insertions(+), 93 deletions(-)
 create mode 100755 src/crawl-http.js
 create mode 100644 src/crawl-local.js
 delete mode 100755 src/crawl-recursive.js

diff --git a/package.json b/package.json
index 86856d9..e0f2cae 100644
--- a/package.json
+++ b/package.json
@@ -4,11 +4,13 @@
   "main": "src/play.js",
   "scripts": {
     "play": "node src/play.js",
-    "crawl-recursive": "node src/crawl-recursive"
+    "crawl-http": "node src/crawl-http",
+    "crawl-local": "node src/crawl-local"
   },
   "bin": {
     "http-music": "./src/play.js",
-    "http-music-crawl-recursive": "./src/crawl-recursive.js"
+    "http-music-crawl-http": "./src/crawl-http.js",
+    "http-music-crawl-local": "./src/crawl-local.js"
   },
   "dependencies": {
     "cheerio": "^1.0.0-rc.1",
diff --git a/src/crawl-http.js b/src/crawl-http.js
new file mode 100755
index 0000000..189ba28
--- /dev/null
+++ b/src/crawl-http.js
@@ -0,0 +1,91 @@
+#!/usr/bin/env node
+
+'use strict'
+
+const MAX_DOWNLOAD_ATTEMPTS = 5
+
+const fetch = require('node-fetch')
+const $ = require('cheerio')
+
+function crawl(absURL, attempts = 0) {
+  // Recursively crawls a given URL, following every link to a deeper path and
+  // recording all links in a tree (in the same format playlists use). Makes
+  // multiple attempts to download failed paths.
+
+  return fetch(absURL)
+    .then(
+      res => res.text().then(text => {
+        const links = getHTMLLinks(text)
+        const verbose = process.argv.includes('--verbose')
+
+        return Promise.all(links.map(link => {
+          const [ title, href ] = link
+
+          if (href.endsWith('/')) {
+            // It's a directory!
+
+            if (verbose) console.log("[Dir] " + absURL + href)
+            return crawl(absURL + href)
+              .then(res => [title, res])
+          } else {
+            // It's a file!
+
+            if (verbose) console.log("[File] " + absURL + href)
+            return Promise.resolve([title, absURL + href])
+          }
+        }))
+      }),
+
+      err => {
+        console.warn("Failed to download: " + absURL)
+
+        if (attempts < MAX_DOWNLOAD_ATTEMPTS) {
+          console.warn(
+            "Trying again. Attempt " + (attempts + 1) +
+            "/" + MAX_DOWNLOAD_ATTEMPTS + "..."
+          )
+
+          return crawl(absURL, attempts + 1)
+        } else {
+          console.error(
+            "We've hit the download attempt limit (" +
+            MAX_DOWNLOAD_ATTEMPTS + "). Giving up on this path."
+          )
+
+          throw 'FAILED_DOWNLOAD'
+        }
+      }
+    )
+    .catch(error => {
+      if (error === 'FAILED_DOWNLOAD') {
+        // Debug logging for this is already handled above.
+        return []
+      } else {
+        throw error
+      }
+    })
+}
+
+function getHTMLLinks(text) {
+  // Never parse HTML with a regex!
+
+  return $(text).find('a').get().map(a => {
+    const $a = $(a)
+    return [$a.text(), $a.attr('href')]
+  })
+}
+
+if (process.argv.length === 2) {
+  console.log("Usage: http-music-crawl-http http://.../example/path/")
+  console.log("..or, npm run crawl-http -- http://.../example/path/")
+} else {
+  let url = process.argv[2]
+
+  if (!(url.endsWith('/'))) {
+    url = url + '/'
+  }
+
+  crawl(url)
+    .then(res => console.log(JSON.stringify(res, null, 2)))
+    .catch(err => console.error(err))
+}
diff --git a/src/crawl-local.js b/src/crawl-local.js
new file mode 100644
index 0000000..d9a9a70
--- /dev/null
+++ b/src/crawl-local.js
@@ -0,0 +1,41 @@
+#!/usr/bin/env node
+
+'use strict'
+
+const fs = require('fs')
+const path = require('path')
+
+const { promisify } = require('util')
+const readDir = promisify(fs.readdir)
+const stat = promisify(fs.stat)
+
+function crawl(dirPath) {
+  return readDir(dirPath).then(
+    res => Promise.all(res.map(item => {
+      const itemPath = path.join(dirPath, item)
+
+      return stat(itemPath).then(stats => {
+        if (stats.isDirectory()) {
+          return crawl(itemPath).then(contents => {
+            const group = [item, contents]
+            return group
+          })
+        } else if (stats.isFile()) {
+          const track = [item, itemPath]
+          return track
+        }
+      })
+    })
+  ))
+}
+
+if (process.argv.length === 2) {
+  console.log("Usage: http-music-crawl-local /example/path..")
+  console.log("..or, npm run crawl-local /example/path")
+} else {
+  const path = process.argv[2]
+
+  crawl(path)
+    .then(res => console.log(JSON.stringify(res, null, 2)))
+    .catch(err => console.error(err))
+}
diff --git a/src/crawl-recursive.js b/src/crawl-recursive.js
deleted file mode 100755
index 2656279..0000000
--- a/src/crawl-recursive.js
+++ /dev/null
@@ -1,91 +0,0 @@
-#!/usr/bin/env node
-
-'use strict'
-
-const MAX_DOWNLOAD_ATTEMPTS = 5
-
-const fetch = require('node-fetch')
-const $ = require('cheerio')
-
-function crawl(absURL, attempts = 0) {
-  // Recursively crawls a given URL, following every link to a deeper path and
-  // recording all links in a tree (in the same format playlists use). Makes
-  // multiple attempts to download failed paths.
-
-  return fetch(absURL)
-    .then(
-      res => res.text().then(text => {
-        const links = getHTMLLinks(text)
-        const verbose = process.argv.includes('--verbose')
-
-        return Promise.all(links.map(link => {
-          const [ title, href ] = link
-
-          if (href.endsWith('/')) {
-            // It's a directory!
-
-            if (verbose) console.log("[Dir] " + absURL + href)
-            return crawl(absURL + href)
-              .then(res => [title, res])
-          } else {
-            // It's a file!
-
-            if (verbose) console.log("[File] " + absURL + href)
-            return Promise.resolve([title, absURL + href])
-          }
-        }))
-      }),
-
-      err => {
-        console.warn("Failed to download: " + absURL)
-
-        if (attempts < MAX_DOWNLOAD_ATTEMPTS) {
-          console.warn(
-            "Trying again. Attempt " + (attempts + 1) +
-            "/" + MAX_DOWNLOAD_ATTEMPTS + "..."
-          )
-
-          return crawl(absURL, attempts + 1)
-        } else {
-          console.error(
-            "We've hit the download attempt limit (" +
-            MAX_DOWNLOAD_ATTEMPTS + "). Giving up on this path."
-          )
-
-          throw 'FAILED_DOWNLOAD'
-        }
-      }
-    )
-    .catch(error => {
-      if (error === 'FAILED_DOWNLOAD') {
-        // Debug logging for this is already handled above.
-        return []
-      } else {
-        throw error
-      }
-    })
-}
-
-function getHTMLLinks(text) {
-  // Never parse HTML with a regex!
-
-  return $(text).find('a').get().map(a => {
-    const $a = $(a)
-    return [$a.text(), $a.attr('href')]
-  })
-}
-
-if (process.argv.length === 2) {
-  console.log("Usage: http-music-crawl-recursive http://.../example/path/")
-  console.log("..or, npm run crawl-recursive -- http://...")
-} else {
-  let url = process.argv[2]
-
-  if (!(url.endsWith('/'))) {
-    url = url + '/'
-  }
-
-  crawl(url)
-    .then(res => console.log(JSON.stringify(res, null, 2)))
-    .catch(err => console.error(err))
-}
diff --git a/todo.txt b/todo.txt
index 53a5991..3197f92 100644
--- a/todo.txt
+++ b/todo.txt
@@ -76,6 +76,7 @@ TODO: Use NOT the internet as its source, so that it's a bit more general
       (Done!)
 
 TODO: Recursive local file playlist crawler.
+      (Done!)
 
 TODO: *Requiring* a literal `playlist.json` file doesn't seem quite right,
       especially since there's the `--open` option.
-- 
cgit 1.3.0-6-gf8a5