diff options
Diffstat (limited to 'crawlers.js')
-rw-r--r-- | crawlers.js | 14 |
1 files changed, 11 insertions, 3 deletions
diff --git a/crawlers.js b/crawlers.js index 4c96c85..3a1436d 100644 --- a/crawlers.js +++ b/crawlers.js @@ -75,7 +75,15 @@ function crawlHTTP(absURL, opts = {}, internals = {}) { name = name.trim() - const urlObj = new URL(href, absURL + '/') + let base + if (path.extname(absURL)) { + base = path.dirname(absURL) + '/' + console.log('extname:', path.extname(absURL), 'so base:', base) + } else { + base = absURL + } + + const urlObj = new URL(href, base) const linkURL = urlObj.toString() if (internals.allURLs.includes(linkURL)) { @@ -104,9 +112,9 @@ function crawlHTTP(absURL, opts = {}, internals = {}) { break sameDir } - const relative = path.relative(absURLObj.pathname, urlObj.pathname) + const relative = path.relative((new URL(base)).pathname, urlObj.pathname) if (relative.startsWith('..') || path.isAbsolute(relative)) { - verboseLog("[Ignored] Outside of parent directory: " + linkURL) + verboseLog("[Ignored] Outside of parent directory: " + linkURL + "\n-- relative: " + relative + "\n-- to base: " + base) continue } } |