From 6f640a0b8e8e5b26a266f4680a626a629d3c7944 Mon Sep 17 00:00:00 2001
From: Florrie <towerofnix@gmail.com>
Date: Fri, 5 Jan 2018 23:20:23 -0400
Subject: Support mpga in crawl-http

---
 src/crawl-http.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/crawl-http.js')
diff --git a/src/crawl-http.js b/src/crawl-http.js
index d3e1533..9c7608e 100755
--- a/src/crawl-http.js
+++ b/src/crawl-http.js
@@ -21,7 +21,7 @@ function crawl(absURL, opts = {}, internals = {}) {
     keepSeparateHosts = false,
 
     keepAnyFileType = false,
-    fileTypes = ['wav', 'ogg', 'oga', 'mp3', 'mp4', 'm4a', 'mov'],
+    fileTypes = ['wav', 'ogg', 'oga', 'mp3', 'mp4', 'm4a', 'mov', 'mpga'],
 
     filterRegex = null
   } = opts
-- 
cgit 1.3.0-6-gf8a5


From 64bcc2930392d70437dc5bc8b2f078840d8998a9 Mon Sep 17 00:00:00 2001
From: Florrie <towerofnix@gmail.com>
Date: Sat, 27 Jan 2018 00:23:21 -0400
Subject: Various improvements to crawl-http

Names are now trimmed. You shouldn't see " Vim!" anymore - just "Vim!".

.MOD files are considered to be music.

The crawler will try to avoid going out of whatever directory was passed
to it.
---
 src/crawl-http.js | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

(limited to 'src/crawl-http.js')

diff --git a/src/crawl-http.js b/src/crawl-http.js
index 9c7608e..ae38ca4 100755
--- a/src/crawl-http.js
+++ b/src/crawl-http.js
@@ -19,9 +19,10 @@ function crawl(absURL, opts = {}, internals = {}) {
     maxAttempts = 5,
 
     keepSeparateHosts = false,
+    stayInSameDirectory = true,
 
     keepAnyFileType = false,
-    fileTypes = ['wav', 'ogg', 'oga', 'mp3', 'mp4', 'm4a', 'mov', 'mpga'],
+    fileTypes = ['wav', 'ogg', 'oga', 'mp3', 'mp4', 'm4a', 'mov', 'mpga', 'mod'],
 
     filterRegex = null
   } = opts
@@ -56,7 +57,9 @@ function crawl(absURL, opts = {}, internals = {}) {
             name = name.slice(0, -1)
           }
 
-          const urlObj = new url.URL(href, absURL)
+          name = name.trim()
+
+          const urlObj = new url.URL(href, absURL + '/')
           const linkURL = url.format(urlObj)
 
           if (internals.allURLs.includes(linkURL)) {
@@ -79,6 +82,14 @@ function crawl(absURL, opts = {}, internals = {}) {
             return false
           }
 
+          if (stayInSameDirectory) {
+            const relative = path.relative(absURLObj.pathname, urlObj.pathname)
+            if (relative.startsWith('..') || path.isAbsolute(relative)) {
+              verboseLog("[Ignored] Outside of parent directory: " + linkURL)
+              return false
+            }
+          }
+
           if (href.endsWith('/')) {
             // It's a directory!
 
-- 
cgit 1.3.0-6-gf8a5


From 60d4ac4b28eee349070ad0930330654e2d67e27d Mon Sep 17 00:00:00 2001
From: Florrie <towerofnix@gmail.com>
Date: Sat, 27 Jan 2018 00:39:22 -0400
Subject: Make crawl-http go through one directory at a time

Hopefully this makes the tool, like, less of an unintentional
denial-of-service.
---
 src/crawl-http.js | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'src/crawl-http.js')

diff --git a/src/crawl-http.js b/src/crawl-http.js
index ae38ca4..b40ed02 100755
--- a/src/crawl-http.js
+++ b/src/crawl-http.js
@@ -44,10 +44,12 @@ function crawl(absURL, opts = {}, internals = {}) {
 
   return fetch(absURL)
     .then(
-      res => res.text().then(text => {
+      res => res.text().then(async text => {
         const links = getHTMLLinks(text)
 
-        return Promise.all(links.map(link => {
+        const items = []
+
+        for (const link of links) {
           let [ name, href ] = link
 
           // If the name (that's the content inside of <a>..</a>) ends with a
@@ -64,29 +66,26 @@ function crawl(absURL, opts = {}, internals = {}) {
 
           if (internals.allURLs.includes(linkURL)) {
             verboseLog("[Ignored] Already done this URL: " + linkURL)
-
-            return false
+            continue
           }
 
           internals.allURLs.push(linkURL)
 
           if (filterRegex && !(filterRegex.test(linkURL))) {
             verboseLog("[Ignored] Failed regex: " + linkURL)
-
-            return false
+            continue
           }
 
           if (!keepSeparateHosts && urlObj.host !== absURLObj.host) {
             verboseLog("[Ignored] Inconsistent host: " + linkURL)
-
-            return false
+            continue
           }
 
           if (stayInSameDirectory) {
             const relative = path.relative(absURLObj.pathname, urlObj.pathname)
             if (relative.startsWith('..') || path.isAbsolute(relative)) {
               verboseLog("[Ignored] Outside of parent directory: " + linkURL)
-              return false
+              continue
             }
           }
 
@@ -95,8 +94,10 @@ function crawl(absURL, opts = {}, internals = {}) {
 
             verboseLog("[Dir] " + linkURL)
 
-            return crawl(linkURL, opts, Object.assign({}, internals))
-              .then(({ items }) => ({name, items}))
+            items.push(await (
+              crawl(linkURL, opts, Object.assign({}, internals))
+                .then(({ items }) => ({name, items}))
+            ))
           } else {
             // It's a file!
 
@@ -107,14 +108,15 @@ function crawl(absURL, opts = {}, internals = {}) {
               !(extensions.includes(path.extname(href)))
             ) {
               verboseLog("[Ignored] Bad extension: " + linkURL)
-
-              return false
+              continue
             }
 
             verboseLog("[File] " + linkURL)
-            return Promise.resolve({name, downloaderArg: linkURL})
+            items.push({name, downloaderArg: linkURL})
           }
-        }).filter(Boolean)).then(items => ({items}))
+        }
+
+        return {items}
       }),
 
       err => {
-- 
cgit 1.3.0-6-gf8a5


From fe65f1777f130ec9d61c5ce06532a551b5dcc899 Mon Sep 17 00:00:00 2001
From: Florrie <towerofnix@gmail.com>
Date: Mon, 12 Feb 2018 19:28:49 -0400
Subject: Make it reasonable to have crawl-http save to a file while verbosely
 logging

---
 src/crawl-http.js | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'src/crawl-http.js')

diff --git a/src/crawl-http.js b/src/crawl-http.js
index b40ed02..5a4932d 100755
--- a/src/crawl-http.js
+++ b/src/crawl-http.js
@@ -36,7 +36,7 @@ function crawl(absURL, opts = {}, internals = {}) {
 
   const verboseLog = text => {
     if (verbose) {
-      console.log(text)
+      console.error(text)
     }
   }
 
@@ -203,7 +203,10 @@ async function main(args, shouldReturn = false) {
       // such. Defaults to false.
 
       verbose = true
-      console.log('Outputting verbosely.')
+      console.error(
+        'Outputting verbosely. (Log output goes to STDERR - ' +
+        'you can still pipe to a file to save your playlist.)'
+      )
     },
 
     'v': util => util.alias('-verbose'),
-- 
cgit 1.3.0-6-gf8a5