diff options
-rw-r--r-- | crawlers.js | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/crawlers.js b/crawlers.js index feeedf2..c12948f 100644 --- a/crawlers.js +++ b/crawlers.js @@ -86,7 +86,15 @@ function crawlHTTP(absURL, opts = {}, internals = {}) { name = name.trim() - const urlObj = new url.URL(href, absURL + '/') + let base + if (path.extname(absURL)) { + base = path.dirname(absURL) + '/' + console.log('extname:', path.extname(absURL), 'so base:', base) + } else { + base = absURL + } + + const urlObj = new url.URL(href, base) const linkURL = url.format(urlObj) if (internals.allURLs.includes(linkURL)) { @@ -115,9 +123,9 @@ function crawlHTTP(absURL, opts = {}, internals = {}) { break sameDir } - const relative = path.relative(absURLObj.pathname, urlObj.pathname) + const relative = path.relative((new url.URL(base)).pathname, urlObj.pathname) if (relative.startsWith('..') || path.isAbsolute(relative)) { - verboseLog("[Ignored] Outside of parent directory: " + linkURL) + verboseLog("[Ignored] Outside of parent directory: " + linkURL + "\n-- relative: " + relative + "\n-- to base: " + base) continue } } |