diff options
| author | (quasar) nebula <qznebula@protonmail.com> | 2026-04-12 12:14:23 -0300 |
|---|---|---|
| committer | (quasar) nebula <qznebula@protonmail.com> | 2026-04-12 12:15:04 -0300 |
| commit | 0d2481ed8abdb084f5c10984181c2a7355d5208a (patch) | |
| tree | f039a8ef4def20bb8ac9202bdc85a94dfaec3dbb /src/reformat-urls.js | |
| parent | 5fcd8ce38402c6623b57a5dc846c9786a24644f1 (diff) | |
upd8, etc: curated url validation, tidying modes, --format-urls
Diffstat (limited to 'src/reformat-urls.js')
| -rw-r--r-- | src/reformat-urls.js | 163 |
1 files changed, 163 insertions, 0 deletions
diff --git a/src/reformat-urls.js b/src/reformat-urls.js new file mode 100644 index 00000000..a42d7a4a --- /dev/null +++ b/src/reformat-urls.js @@ -0,0 +1,163 @@ +// Find-replace calls analogous to isCuratedURL in #validators. +// This can't catch everything, but should automate the greater bulk of it. + +import * as path from 'node:path'; + +import {replaceInFile} from 'replace-in-file'; + +import {colors, logInfo} from '#cli'; +import {escapeRegex, re} from '#sugar'; + +function or(options) { + return options.map(escapeRegex).join('|'); +} + +function https(namespace, domain) { + return [ + `${namespace}: http:// to https://`, + + re('gmi', [ + '^- http://', + `(?=(?:` + domain + ')/)', + ]), + + '- https://', + ]; +} + + +// Rules are evaluated top to bottom, in order, +// so each rule can build off previous ones. +const findreplace = []; + +// Twitter + +const twitterDomains = + or([ + 'www.twitter.com', + 'x.com', + ]); + +findreplace.push(https('twitter', twitterDomains)); + +findreplace.push([ + `twitter: www.twitter.com -> twitter.com`, + /^- https:\/\/www\.twitter\.com\//gmi, + '- https://twitter.com/', +]); + +findreplace.push([ + `twitter: x.com -> twitter.com`, + /^- https:\/\/x\.com\//gmi, + '- https://twitter.com/', +]); + +// YouTube + +const youtubeDomains = + or([ + 'www.youtube.com', + 'youtube.com', + 'youtu.be', + ]); + +findreplace.push(https('youtube', youtubeDomains)); + +findreplace.push([ + `youtube: trim ?si search parameter`, + + re('gmi', [ + '^(', + '- https://', + '(?:' + youtubeDomains + ')', + '\/.*', + ')', + + '[&?]si=', + '[a-z0-9_-]+', + '$', + ]), + + '$1', +]); + +findreplace.push([ + `youtube: youtu.be -> www.youtube.com/watch?v=___`, + /^- https:\/\/youtu\.be\/([a-z0-9_-]{11,11})$/gmi, + '- https://www.youtube.com/watch?v=$1' +]); + +findreplace.push([ + `youtube: youtu.be -> www.youtube.com/watch?v=___&t=___`, + /^- https:\/\/youtu\.be\/([a-z0-9_-]{11,11})\?t=(\d+)$/gmi, + '- https://www.youtube.com/watch?v=$1&t=$2', +]); + +findreplace.push([ + `youtube: youtube.com -> www.youtube.com`, + /^- https:\/\/youtube\.com\//gmi, + '- https://www.youtube.com/', +]); + + +export async function reformatCuratedURLs({ + dataPath, + showChangedFiles = true, + showSatisfiedRules = true, +}) { + if (!dataPath) { + throw new Error(`Expected dataPath`); + } + + let changedFiles = new Map(); + let errored = false; + let anyChanged = false; + + try { + for (const [message, find, replace] of findreplace) { + const options = { + files: dataPath + '/**/*.yaml', + from: find, + to: replace, + }; + + let anyChangedForThisRule = false; + for (const result of await replaceInFile(options)) { + if (result.hasChanged) { + anyChanged = true; + anyChangedForThisRule = true; + if (changedFiles.has(result.file)) { + changedFiles.get(result.file).push(message); + } else { + changedFiles.set(result.file, [message]); + } + } + } + + if (showSatisfiedRules && !anyChangedForThisRule) { + logInfo`Already satisfied: ${message}`; + } + } + + return changedFiles; + } catch (caughtError) { + errored = true; + throw caughtError; + } finally { + const entries = Array.from(changedFiles.entries()); + entries.sort((a, b) => a[0] < b[0] ? -1 : a[0] > b[0] ? +1 : 0); + + if (showChangedFiles) { + for (const [file, messages] of entries) { + logInfo`Updated: ${path.relative(dataPath, file)}`; + for (const message of messages) { + console.log(colors.dim(` - ${message}`)); + } + } + } + + if (!errored) { + return new Map(entries); + } + } +} |