diff options
author | Florrie <towerofni@gmail.com> | 2017-08-03 03:34:57 +0400 |
---|---|---|
committer | Florrie <towerofni@gmail.com> | 2017-08-03 03:35:05 +0400 |
commit | 3b6b9425b84f624b4cca9c3e8123e7f5554aed51 (patch) | |
tree | 9bad61a2b6cd24696648a8793fb5781968ef1069 | |
parent | 63ee18f61991cfcd10e42871d57eccd53fa51828 (diff) |
Kill '/' when at the end of a link name in crawl-http
-rwxr-xr-x | src/crawl-http.js | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/src/crawl-http.js b/src/crawl-http.js index 76f3941..ad3b776 100755 --- a/src/crawl-http.js +++ b/src/crawl-http.js @@ -47,7 +47,15 @@ function crawl(absURL, opts = {}, internals = {}) { const links = getHTMLLinks(text) return Promise.all(links.map(link => { - const [ name, href ] = link + let [ name, href ] = link + + // If the name (that's the content inside of <a>..</a>) ends with a + // slash, that's probably just an artifact of a directory lister; + // not actually part of the intended content. So we remove it! + if (name.endsWith('/')) { + name = name.slice(0, -1) + } + const urlObj = new url.URL(href, absURL) const linkURL = url.format(urlObj) |