Pass YouTube playlist URLs directly to mtui

Or any other crawler argument, and it'll (try to) guess which crawler you want to use automatically. Handy!
author: Florrie <towerofnix@gmail.com> 2018-06-26 12:01:43 -0300
committer: Florrie <towerofnix@gmail.com> 2018-06-26 12:01:45 -0300
commit: 992911a021e2f2cccfbc63e554a3f34bd997cd91 (patch)
tree: d0cfdce2b2a5b47b699befc830f8c0b204d19be5 /crawlers.js
parent: 84ad73f638fe0b03e173b8ffb545d8edb43acc3c (diff)
1 files changed, 109 insertions, 17 deletions
diff --git a/crawlers.js b/crawlers.js
index 82ef78e..caf3c0e 100644
--- a/crawlers.js
+++ b/crawlers.js
@@ -11,6 +11,15 @@ const { promisify } = require('util')
 const readDir = promisify(fs.readdir)
 const stat = promisify(fs.stat)
 
+// Each value is a function with these additional properties:
+// * crawlerName: The name of the crawler, such as "crawl-http". Used by
+//   getCrawlerByName.
+// * isAppropriateForArg: A function returning whether an argument is valid for
+//   the crawler. For example, crawlHTTP.isAppropriateForArg returns whether or
+//   not the passed argument is a valid URL of the HTTP/HTTPS protocol. Used by
+//   getAllCrawlersForArg.
+const allCrawlers = {}
+
 function sortIgnoreCase(sortFunction) {
   return function(a, b) {
     return sortFunction(a.toLowerCase(), b.toLowerCase())
@@ -173,6 +182,39 @@ function crawlHTTP(absURL, opts = {}, internals = {}) {
     })
 }
 
+crawlHTTP.crawlerName = 'crawl-http'
+
+crawlHTTP.isAppropriateForArg = function(arg) {
+  // It is only used for HTTP(S) servers:
+  if (!(arg.startsWith('http://') || arg.startsWith('https://'))) {
+    return false
+  }
+
+  // It will definitely only work for valid URLs:
+  let url
+  try {
+    url = new URL(arg)
+  } catch (error) {
+    return false
+  }
+
+  // If the URL ends with a .json, it is probably meant to be used for a direct
+  // playlist download, not to be crawled.
+  if (path.extname(url.pathname) === '.json') {
+    return false
+  }
+
+  // Just to avoid conflict with crawl-youtube, assume crawl-http is not used
+  // for URLs on YouTube:
+  if (crawlYouTube.isAppropriateForArg(arg)) {
+    return false
+  }
+
+  return true
+}
+
+allCrawlers.crawlHTTP = crawlHTTP
+
 function getHTMLLinks(text) {
   // Never parse HTML with a regex!
   const $ = cheerio.load(text)
@@ -183,8 +225,6 @@ function getHTMLLinks(text) {
   })
 }
 
-
-
 function crawlLocal(dirPath, extensions = [
   'ogg', 'oga',
   'wav', 'mp3', 'mp4', 'm4a', 'aac',
@@ -238,6 +278,28 @@ function crawlLocal(dirPath, extensions = [
     .then(filteredItems => ({items: filteredItems}))
 }
 
+crawlLocal.crawlerName = 'crawl-local'
+
+crawlLocal.isAppropriateForArg = function(arg) {
+  // When the passed argument is a valid URL, it is only used for file://
+  // URLs:
+  try {
+    const url = new URL(arg)
+    if (url.protocol !== 'file:') {
+      return false
+    }
+  } catch (error) {}
+
+  // If the passed argument ends with .json, it is probably not a directory.
+  if (path.extname(arg) === '.json') {
+    return false
+  }
+
+  return true
+}
+
+allCrawlers.crawlLocal = crawlLocal
+
 async function crawlYouTube(url) {
   const ytdl = spawn('youtube-dl', [
     '-j', // Output as JSON
@@ -266,23 +328,53 @@ async function crawlYouTube(url) {
   }
 }
 
+crawlYouTube.crawlerName = 'crawl-youtube'
+
+crawlYouTube.isAppropriateForArg = function(arg) {
+  // It is definitely not used for arguments that are not URLs:
+  let url
+  try {
+    url = new URL(arg)
+  } catch (error) {
+    return false
+  }
+
+  // It is only used for URLs on the YouTube domain:
+  if (!(url.hostname === 'youtube.com' || url.hostname === 'www.youtube.com')) {
+    return false
+  }
+
+  // It is only used for playlist pages:
+  if (url.pathname !== '/playlist') {
+    return false
+  }
+
+  return true
+}
+
+allCrawlers.crawlYouTube = crawlYouTube
+
 async function openFile(input) {
   return JSON.parse(await downloadPlaylistFromOptionValue(input))
 }
 
-module.exports = {
-  crawlHTTP,
-  crawlLocal,
-  crawlYouTube,
-  openFile,
-
-  getCrawlerByName: function(name) {
-    switch (name) {
-      case 'crawl-http': return crawlHTTP
-      case 'crawl-local': return crawlLocal
-      case 'crawl-youtube': return crawlYouTube
-      case 'open-file': return openFile
-      default: return null
-    }
-  }
+openFile.crawlerName = 'open-file'
+
+openFile.isAppropriateForArg = function(arg) {
+  // It is only valid for arguments that end with .json:
+  return path.extname(arg) === '.json'
+}
+
+allCrawlers.openFile = openFile
+
+// Actual module.exports stuff:
+
+Object.assign(module.exports, allCrawlers)
+
+module.exports.getCrawlerByName = function(name) {
+  return Object.values(allCrawlers).find(fn => fn.crawlerName === name)
+}
+
+module.exports.getAllCrawlersForArg = function(arg) {
+  return Object.values(allCrawlers).filter(fn => fn.isAppropriateForArg(arg))
 }
author	Florrie <towerofnix@gmail.com>	2018-06-26 12:01:43 -0300
committer	Florrie <towerofnix@gmail.com>	2018-06-26 12:01:45 -0300
commit	992911a021e2f2cccfbc63e554a3f34bd997cd91 (patch)
tree	d0cfdce2b2a5b47b699befc830f8c0b204d19be5 /crawlers.js
parent	84ad73f638fe0b03e173b8ffb545d8edb43acc3c (diff)