diff options
author | Florrie <towerofnix@gmail.com> | 2018-12-22 13:45:33 -0400 |
---|---|---|
committer | Florrie <towerofnix@gmail.com> | 2018-12-23 00:51:59 -0400 |
commit | b0f256ea0352fd86f40c6e4bd18bee47c36c320b (patch) | |
tree | c14f7ed2269611367da15a7be252b5075bf5279b | |
parent | 770f7ce6f89f7cfc0d1f8f5d279a0649caa1a45a (diff) |
HTTP crawler stuff
Basically, support for cors-anywhere. (Cherry-picked from web-mtui branch)
-rw-r--r-- | crawlers.js | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/crawlers.js b/crawlers.js index feeedf2..c12948f 100644 --- a/crawlers.js +++ b/crawlers.js @@ -86,7 +86,15 @@ function crawlHTTP(absURL, opts = {}, internals = {}) { name = name.trim() - const urlObj = new url.URL(href, absURL + '/') + let base + if (path.extname(absURL)) { + base = path.dirname(absURL) + '/' + console.log('extname:', path.extname(absURL), 'so base:', base) + } else { + base = absURL + } + + const urlObj = new url.URL(href, base) const linkURL = url.format(urlObj) if (internals.allURLs.includes(linkURL)) { @@ -115,9 +123,9 @@ function crawlHTTP(absURL, opts = {}, internals = {}) { break sameDir } - const relative = path.relative(absURLObj.pathname, urlObj.pathname) + const relative = path.relative((new url.URL(base)).pathname, urlObj.pathname) if (relative.startsWith('..') || path.isAbsolute(relative)) { - verboseLog("[Ignored] Outside of parent directory: " + linkURL) + verboseLog("[Ignored] Outside of parent directory: " + linkURL + "\n-- relative: " + relative + "\n-- to base: " + base) continue } } |