diff options
-rwxr-xr-x | src/crawl-http.js | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/src/crawl-http.js b/src/crawl-http.js index 76f3941..ad3b776 100755 --- a/src/crawl-http.js +++ b/src/crawl-http.js @@ -47,7 +47,15 @@ function crawl(absURL, opts = {}, internals = {}) { const links = getHTMLLinks(text) return Promise.all(links.map(link => { - const [ name, href ] = link + let [ name, href ] = link + + // If the name (that's the content inside of <a>..</a>) ends with a + // slash, that's probably just an artifact of a directory lister; + // not actually part of the intended content. So we remove it! + if (name.endsWith('/')) { + name = name.slice(0, -1) + } + const urlObj = new url.URL(href, absURL) const linkURL = url.format(urlObj) |