« get me outta code hell

Make crawl-http go through one directory at a time - http-music - Command-line music player + utils (not a server!)
about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorrie <towerofnix@gmail.com>2018-01-27 00:39:22 -0400
committerFlorrie <towerofnix@gmail.com>2018-01-27 00:39:24 -0400
commit60d4ac4b28eee349070ad0930330654e2d67e27d (patch)
tree3f52a7447fa43fe3f809c79f798c4edd8d6dc288
parent64bcc2930392d70437dc5bc8b2f078840d8998a9 (diff)
Make crawl-http go through one directory at a time
Hopefully this makes the tool, like, less of an unintentional
denial-of-service.
-rwxr-xr-xsrc/crawl-http.js32
1 files changed, 17 insertions, 15 deletions
diff --git a/src/crawl-http.js b/src/crawl-http.js
index ae38ca4..b40ed02 100755
--- a/src/crawl-http.js
+++ b/src/crawl-http.js
@@ -44,10 +44,12 @@ function crawl(absURL, opts = {}, internals = {}) {
 
   return fetch(absURL)
     .then(
-      res => res.text().then(text => {
+      res => res.text().then(async text => {
         const links = getHTMLLinks(text)
 
-        return Promise.all(links.map(link => {
+        const items = []
+
+        for (const link of links) {
           let [ name, href ] = link
 
           // If the name (that's the content inside of <a>..</a>) ends with a
@@ -64,29 +66,26 @@ function crawl(absURL, opts = {}, internals = {}) {
 
           if (internals.allURLs.includes(linkURL)) {
             verboseLog("[Ignored] Already done this URL: " + linkURL)
-
-            return false
+            continue
           }
 
           internals.allURLs.push(linkURL)
 
           if (filterRegex && !(filterRegex.test(linkURL))) {
             verboseLog("[Ignored] Failed regex: " + linkURL)
-
-            return false
+            continue
           }
 
           if (!keepSeparateHosts && urlObj.host !== absURLObj.host) {
             verboseLog("[Ignored] Inconsistent host: " + linkURL)
-
-            return false
+            continue
           }
 
           if (stayInSameDirectory) {
             const relative = path.relative(absURLObj.pathname, urlObj.pathname)
             if (relative.startsWith('..') || path.isAbsolute(relative)) {
               verboseLog("[Ignored] Outside of parent directory: " + linkURL)
-              return false
+              continue
             }
           }
 
@@ -95,8 +94,10 @@ function crawl(absURL, opts = {}, internals = {}) {
 
             verboseLog("[Dir] " + linkURL)
 
-            return crawl(linkURL, opts, Object.assign({}, internals))
-              .then(({ items }) => ({name, items}))
+            items.push(await (
+              crawl(linkURL, opts, Object.assign({}, internals))
+                .then(({ items }) => ({name, items}))
+            ))
           } else {
             // It's a file!
 
@@ -107,14 +108,15 @@ function crawl(absURL, opts = {}, internals = {}) {
               !(extensions.includes(path.extname(href)))
             ) {
               verboseLog("[Ignored] Bad extension: " + linkURL)
-
-              return false
+              continue
             }
 
             verboseLog("[File] " + linkURL)
-            return Promise.resolve({name, downloaderArg: linkURL})
+            items.push({name, downloaderArg: linkURL})
           }
-        }).filter(Boolean)).then(items => ({items}))
+        }
+
+        return {items}
       }),
 
       err => {