« get me outta code hell

HTTP crawler stuff - mtui - Music Text User Interface - user-friendly command line music player
about summary refs log tree commit diff
diff options
context:
space:
mode:
authorFlorrie <towerofnix@gmail.com>2018-12-22 13:45:33 -0400
committerFlorrie <towerofnix@gmail.com>2018-12-22 13:45:33 -0400
commit72eff553df4fd4496172cf8d26b4585ac9f34b49 (patch)
treef61837d6cb5ad537c06256b70ce27508d7078528
parenteb13543e95052b2d1a0c6cd3376c9655579527bc (diff)
HTTP crawler stuff
Basically, support for cors-anywhere.
-rw-r--r--crawlers.js14
1 files changed, 11 insertions, 3 deletions
diff --git a/crawlers.js b/crawlers.js
index 4c96c85..3a1436d 100644
--- a/crawlers.js
+++ b/crawlers.js
@@ -75,7 +75,15 @@ function crawlHTTP(absURL, opts = {}, internals = {}) {
 
           name = name.trim()
 
-          const urlObj = new URL(href, absURL + '/')
+          let base
+          if (path.extname(absURL)) {
+            base = path.dirname(absURL) + '/'
+            console.log('extname:', path.extname(absURL), 'so base:', base)
+          } else {
+            base = absURL
+          }
+
+          const urlObj = new URL(href, base)
           const linkURL = urlObj.toString()
 
           if (internals.allURLs.includes(linkURL)) {
@@ -104,9 +112,9 @@ function crawlHTTP(absURL, opts = {}, internals = {}) {
               break sameDir
             }
 
-            const relative = path.relative(absURLObj.pathname, urlObj.pathname)
+            const relative = path.relative((new URL(base)).pathname, urlObj.pathname)
             if (relative.startsWith('..') || path.isAbsolute(relative)) {
-              verboseLog("[Ignored] Outside of parent directory: " + linkURL)
+              verboseLog("[Ignored] Outside of parent directory: " + linkURL + "\n-- relative: " + relative + "\n-- to base: " + base)
               continue
             }
           }