« get me outta code hell

Pass YouTube playlist URLs directly to mtui - mtui - Music Text User Interface - user-friendly command line music player
about summary refs log tree commit diff
path: root/crawlers.js
diff options
context:
space:
mode:
authorFlorrie <towerofnix@gmail.com>2018-06-26 12:01:43 -0300
committerFlorrie <towerofnix@gmail.com>2018-06-26 12:01:45 -0300
commit992911a021e2f2cccfbc63e554a3f34bd997cd91 (patch)
treed0cfdce2b2a5b47b699befc830f8c0b204d19be5 /crawlers.js
parent84ad73f638fe0b03e173b8ffb545d8edb43acc3c (diff)
Pass YouTube playlist URLs directly to mtui
Or any other crawler argument, and it'll (try to) guess which crawler
you want to use automatically. Handy!
Diffstat (limited to 'crawlers.js')
-rw-r--r--crawlers.js126
1 files changed, 109 insertions, 17 deletions
diff --git a/crawlers.js b/crawlers.js
index 82ef78e..caf3c0e 100644
--- a/crawlers.js
+++ b/crawlers.js
@@ -11,6 +11,15 @@ const { promisify } = require('util')
 const readDir = promisify(fs.readdir)
 const stat = promisify(fs.stat)
 
+// Each value is a function with these additional properties:
+// * crawlerName: The name of the crawler, such as "crawl-http". Used by
+//   getCrawlerByName.
+// * isAppropriateForArg: A function returning whether an argument is valid for
+//   the crawler. For example, crawlHTTP.isAppropriateForArg returns whether or
+//   not the passed argument is a valid URL of the HTTP/HTTPS protocol. Used by
+//   getAllCrawlersForArg.
+const allCrawlers = {}
+
 function sortIgnoreCase(sortFunction) {
   return function(a, b) {
     return sortFunction(a.toLowerCase(), b.toLowerCase())
@@ -173,6 +182,39 @@ function crawlHTTP(absURL, opts = {}, internals = {}) {
     })
 }
 
+crawlHTTP.crawlerName = 'crawl-http'
+
+crawlHTTP.isAppropriateForArg = function(arg) {
+  // It is only used for HTTP(S) servers:
+  if (!(arg.startsWith('http://') || arg.startsWith('https://'))) {
+    return false
+  }
+
+  // It will definitely only work for valid URLs:
+  let url
+  try {
+    url = new URL(arg)
+  } catch (error) {
+    return false
+  }
+
+  // If the URL ends with a .json, it is probably meant to be used for a direct
+  // playlist download, not to be crawled.
+  if (path.extname(url.pathname) === '.json') {
+    return false
+  }
+
+  // Just to avoid conflict with crawl-youtube, assume crawl-http is not used
+  // for URLs on YouTube:
+  if (crawlYouTube.isAppropriateForArg(arg)) {
+    return false
+  }
+
+  return true
+}
+
+allCrawlers.crawlHTTP = crawlHTTP
+
 function getHTMLLinks(text) {
   // Never parse HTML with a regex!
   const $ = cheerio.load(text)
@@ -183,8 +225,6 @@ function getHTMLLinks(text) {
   })
 }
 
-
-
 function crawlLocal(dirPath, extensions = [
   'ogg', 'oga',
   'wav', 'mp3', 'mp4', 'm4a', 'aac',
@@ -238,6 +278,28 @@ function crawlLocal(dirPath, extensions = [
     .then(filteredItems => ({items: filteredItems}))
 }
 
+crawlLocal.crawlerName = 'crawl-local'
+
+crawlLocal.isAppropriateForArg = function(arg) {
+  // When the passed argument is a valid URL, it is only used for file://
+  // URLs:
+  try {
+    const url = new URL(arg)
+    if (url.protocol !== 'file:') {
+      return false
+    }
+  } catch (error) {}
+
+  // If the passed argument ends with .json, it is probably not a directory.
+  if (path.extname(arg) === '.json') {
+    return false
+  }
+
+  return true
+}
+
+allCrawlers.crawlLocal = crawlLocal
+
 async function crawlYouTube(url) {
   const ytdl = spawn('youtube-dl', [
     '-j', // Output as JSON
@@ -266,23 +328,53 @@ async function crawlYouTube(url) {
   }
 }
 
+crawlYouTube.crawlerName = 'crawl-youtube'
+
+crawlYouTube.isAppropriateForArg = function(arg) {
+  // It is definitely not used for arguments that are not URLs:
+  let url
+  try {
+    url = new URL(arg)
+  } catch (error) {
+    return false
+  }
+
+  // It is only used for URLs on the YouTube domain:
+  if (!(url.hostname === 'youtube.com' || url.hostname === 'www.youtube.com')) {
+    return false
+  }
+
+  // It is only used for playlist pages:
+  if (url.pathname !== '/playlist') {
+    return false
+  }
+
+  return true
+}
+
+allCrawlers.crawlYouTube = crawlYouTube
+
 async function openFile(input) {
   return JSON.parse(await downloadPlaylistFromOptionValue(input))
 }
 
-module.exports = {
-  crawlHTTP,
-  crawlLocal,
-  crawlYouTube,
-  openFile,
-
-  getCrawlerByName: function(name) {
-    switch (name) {
-      case 'crawl-http': return crawlHTTP
-      case 'crawl-local': return crawlLocal
-      case 'crawl-youtube': return crawlYouTube
-      case 'open-file': return openFile
-      default: return null
-    }
-  }
+openFile.crawlerName = 'open-file'
+
+openFile.isAppropriateForArg = function(arg) {
+  // It is only valid for arguments that end with .json:
+  return path.extname(arg) === '.json'
+}
+
+allCrawlers.openFile = openFile
+
+// Actual module.exports stuff:
+
+Object.assign(module.exports, allCrawlers)
+
+module.exports.getCrawlerByName = function(name) {
+  return Object.values(allCrawlers).find(fn => fn.crawlerName === name)
+}
+
+module.exports.getAllCrawlersForArg = function(arg) {
+  return Object.values(allCrawlers).filter(fn => fn.isAppropriateForArg(arg))
 }