« get me outta code hell

Various improvements to crawl-http - http-music - Command-line music player + utils (not a server!)
about summary refs log tree commit diff
path: root/src
diff options
context:
space:
mode:
authorFlorrie <towerofnix@gmail.com>2018-01-27 00:23:21 -0400
committerFlorrie <towerofnix@gmail.com>2018-01-27 00:35:06 -0400
commit64bcc2930392d70437dc5bc8b2f078840d8998a9 (patch)
tree7e5250e6859a112c85a264c313a9e57818db7c21 /src
parenta72500509a5a334bd8f0f7d490a4833c03201966 (diff)
Various improvements to crawl-http
Names are now trimmed. You shouldn't see " Vim!" anymore - just "Vim!".

.MOD files are considered to be music.

The crawler will try to avoid going out of whatever directory was passed
to it.
Diffstat (limited to 'src')
-rwxr-xr-xsrc/crawl-http.js15
1 files changed, 13 insertions, 2 deletions
diff --git a/src/crawl-http.js b/src/crawl-http.js
index 9c7608e..ae38ca4 100755
--- a/src/crawl-http.js
+++ b/src/crawl-http.js
@@ -19,9 +19,10 @@ function crawl(absURL, opts = {}, internals = {}) {
     maxAttempts = 5,
 
     keepSeparateHosts = false,
+    stayInSameDirectory = true,
 
     keepAnyFileType = false,
-    fileTypes = ['wav', 'ogg', 'oga', 'mp3', 'mp4', 'm4a', 'mov', 'mpga'],
+    fileTypes = ['wav', 'ogg', 'oga', 'mp3', 'mp4', 'm4a', 'mov', 'mpga', 'mod'],
 
     filterRegex = null
   } = opts
@@ -56,7 +57,9 @@ function crawl(absURL, opts = {}, internals = {}) {
             name = name.slice(0, -1)
           }
 
-          const urlObj = new url.URL(href, absURL)
+          name = name.trim()
+
+          const urlObj = new url.URL(href, absURL + '/')
           const linkURL = url.format(urlObj)
 
           if (internals.allURLs.includes(linkURL)) {
@@ -79,6 +82,14 @@ function crawl(absURL, opts = {}, internals = {}) {
             return false
           }
 
+          if (stayInSameDirectory) {
+            const relative = path.relative(absURLObj.pathname, urlObj.pathname)
+            if (relative.startsWith('..') || path.isAbsolute(relative)) {
+              verboseLog("[Ignored] Outside of parent directory: " + linkURL)
+              return false
+            }
+          }
+
           if (href.endsWith('/')) {
             // It's a directory!