« get me outta code hell

HTTP crawler stuff - mtui - Music Text User Interface - user-friendly command line music player
about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorrie <towerofnix@gmail.com>2018-12-22 13:45:33 -0400
committerFlorrie <towerofnix@gmail.com>2018-12-23 00:51:59 -0400
commitb0f256ea0352fd86f40c6e4bd18bee47c36c320b (patch)
treec14f7ed2269611367da15a7be252b5075bf5279b
parent770f7ce6f89f7cfc0d1f8f5d279a0649caa1a45a (diff)
HTTP crawler stuff
Basically, support for cors-anywhere.

(Cherry-picked from web-mtui branch)
-rw-r--r--crawlers.js14
1 files changed, 11 insertions, 3 deletions
diff --git a/crawlers.js b/crawlers.js
index feeedf2..c12948f 100644
--- a/crawlers.js
+++ b/crawlers.js
@@ -86,7 +86,15 @@ function crawlHTTP(absURL, opts = {}, internals = {}) {
 
           name = name.trim()
 
-          const urlObj = new url.URL(href, absURL + '/')
+          let base
+          if (path.extname(absURL)) {
+            base = path.dirname(absURL) + '/'
+            console.log('extname:', path.extname(absURL), 'so base:', base)
+          } else {
+            base = absURL
+          }
+
+          const urlObj = new url.URL(href, base)
           const linkURL = url.format(urlObj)
 
           if (internals.allURLs.includes(linkURL)) {
@@ -115,9 +123,9 @@ function crawlHTTP(absURL, opts = {}, internals = {}) {
               break sameDir
             }
 
-            const relative = path.relative(absURLObj.pathname, urlObj.pathname)
+            const relative = path.relative((new url.URL(base)).pathname, urlObj.pathname)
             if (relative.startsWith('..') || path.isAbsolute(relative)) {
-              verboseLog("[Ignored] Outside of parent directory: " + linkURL)
+              verboseLog("[Ignored] Outside of parent directory: " + linkURL + "\n-- relative: " + relative + "\n-- to base: " + base)
               continue
             }
           }