From 0d2481ed8abdb084f5c10984181c2a7355d5208a Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Sun, 12 Apr 2026 12:14:23 -0300 Subject: upd8, etc: curated url validation, tidying modes, --format-urls --- src/cli.js | 21 +- src/data/composite/wiki-properties/urls.js | 4 +- src/data/things/MusicVideo.js | 6 +- src/reformat-urls.js | 163 +++++++++++++++ src/upd8.js | 322 +++++++++++++++++++++-------- src/validators.js | 58 ++++++ src/write/build-modes/index.js | 1 - src/write/build-modes/sort.js | 76 ------- src/write/tidy-modes/format-urls.js | 34 +++ src/write/tidy-modes/index.js | 2 + src/write/tidy-modes/sort.js | 61 ++++++ 11 files changed, 574 insertions(+), 174 deletions(-) create mode 100644 src/reformat-urls.js delete mode 100644 src/write/build-modes/sort.js create mode 100644 src/write/tidy-modes/format-urls.js create mode 100644 src/write/tidy-modes/index.js create mode 100644 src/write/tidy-modes/sort.js (limited to 'src') diff --git a/src/cli.js b/src/cli.js index ec72a625..52ac9f9c 100644 --- a/src/cli.js +++ b/src/cli.js @@ -231,18 +231,29 @@ export function showHelpForOptions({ options, indentWrap, sort = entries => entries, + silentIfNoOptions = false, }) { - if (heading) { - console.log(colors.bright(heading)); - } - const sortedOptions = sort( Object.entries(options) .map(([name, descriptor]) => ({name, descriptor}))); + if (!sortedOptions.length && silentIfNoOptions) return; + + if (heading) { + console.log(colors.bright(heading)); + } + if (!sortedOptions.length) { - console.log(`(No options available)`) + if (heading) { + console.log(``); + console.log(` (No options available)`); + console.log(``); + } else { + console.log(`(No options available)`); + } + + return; } let justInsertedPaddingLine = false; diff --git a/src/data/composite/wiki-properties/urls.js b/src/data/composite/wiki-properties/urls.js index 3160a0bf..04ccf689 100644 --- a/src/data/composite/wiki-properties/urls.js +++ b/src/data/composite/wiki-properties/urls.js @@ -1,14 +1,14 @@ // A list of URLs! This will always be present on the data object, even if set // to an empty array or null. -import {isURL, validateArrayItems} from '#validators'; +import {isCuratedURL, validateArrayItems} from '#validators'; // TODO: Not templateCompositeFrom. export default function() { return { flags: {update: true, expose: true}, - update: {validate: validateArrayItems(isURL)}, + update: {validate: validateArrayItems(isCuratedURL)}, expose: {transform: value => value ?? []}, }; } diff --git a/src/data/things/MusicVideo.js b/src/data/things/MusicVideo.js index 7ebbba37..16dffa3b 100644 --- a/src/data/things/MusicVideo.js +++ b/src/data/things/MusicVideo.js @@ -4,7 +4,7 @@ import {colors} from '#cli'; import {input, V} from '#composite'; import {empty} from '#sugar'; import Thing from '#thing'; -import {is, isDate, isStringNonEmpty, isURL, validateArrayItems} +import {is, isCuratedURL, isDate, isStringNonEmpty, validateArrayItems} from '#validators'; import {parseContributors, parseDate} from '#yaml'; @@ -70,7 +70,7 @@ export class MusicVideo extends Thing { flags: {update: true, expose: true}, update: { - validate: isURL, + validate: isCuratedURL, }, expose: { @@ -86,7 +86,7 @@ export class MusicVideo extends Thing { flags: {update: true, expose: true}, update: { - validate: validateArrayItems(isURL), + validate: validateArrayItems(isCuratedURL), }, expose: { diff --git a/src/reformat-urls.js b/src/reformat-urls.js new file mode 100644 index 00000000..a42d7a4a --- /dev/null +++ b/src/reformat-urls.js @@ -0,0 +1,163 @@ +// Find-replace calls analogous to isCuratedURL in #validators. +// This can't catch everything, but should automate the greater bulk of it. + +import * as path from 'node:path'; + +import {replaceInFile} from 'replace-in-file'; + +import {colors, logInfo} from '#cli'; +import {escapeRegex, re} from '#sugar'; + +function or(options) { + return options.map(escapeRegex).join('|'); +} + +function https(namespace, domain) { + return [ + `${namespace}: http:// to https://`, + + re('gmi', [ + '^- http://', + `(?=(?:` + domain + ')/)', + ]), + + '- https://', + ]; +} + + +// Rules are evaluated top to bottom, in order, +// so each rule can build off previous ones. +const findreplace = []; + +// Twitter + +const twitterDomains = + or([ + 'www.twitter.com', + 'x.com', + ]); + +findreplace.push(https('twitter', twitterDomains)); + +findreplace.push([ + `twitter: www.twitter.com -> twitter.com`, + /^- https:\/\/www\.twitter\.com\//gmi, + '- https://twitter.com/', +]); + +findreplace.push([ + `twitter: x.com -> twitter.com`, + /^- https:\/\/x\.com\//gmi, + '- https://twitter.com/', +]); + +// YouTube + +const youtubeDomains = + or([ + 'www.youtube.com', + 'youtube.com', + 'youtu.be', + ]); + +findreplace.push(https('youtube', youtubeDomains)); + +findreplace.push([ + `youtube: trim ?si search parameter`, + + re('gmi', [ + '^(', + '- https://', + '(?:' + youtubeDomains + ')', + '\/.*', + ')', + + '[&?]si=', + '[a-z0-9_-]+', + '$', + ]), + + '$1', +]); + +findreplace.push([ + `youtube: youtu.be -> www.youtube.com/watch?v=___`, + /^- https:\/\/youtu\.be\/([a-z0-9_-]{11,11})$/gmi, + '- https://www.youtube.com/watch?v=$1' +]); + +findreplace.push([ + `youtube: youtu.be -> www.youtube.com/watch?v=___&t=___`, + /^- https:\/\/youtu\.be\/([a-z0-9_-]{11,11})\?t=(\d+)$/gmi, + '- https://www.youtube.com/watch?v=$1&t=$2', +]); + +findreplace.push([ + `youtube: youtube.com -> www.youtube.com`, + /^- https:\/\/youtube\.com\//gmi, + '- https://www.youtube.com/', +]); + + +export async function reformatCuratedURLs({ + dataPath, + showChangedFiles = true, + showSatisfiedRules = true, +}) { + if (!dataPath) { + throw new Error(`Expected dataPath`); + } + + let changedFiles = new Map(); + let errored = false; + let anyChanged = false; + + try { + for (const [message, find, replace] of findreplace) { + const options = { + files: dataPath + '/**/*.yaml', + from: find, + to: replace, + }; + + let anyChangedForThisRule = false; + for (const result of await replaceInFile(options)) { + if (result.hasChanged) { + anyChanged = true; + anyChangedForThisRule = true; + if (changedFiles.has(result.file)) { + changedFiles.get(result.file).push(message); + } else { + changedFiles.set(result.file, [message]); + } + } + } + + if (showSatisfiedRules && !anyChangedForThisRule) { + logInfo`Already satisfied: ${message}`; + } + } + + return changedFiles; + } catch (caughtError) { + errored = true; + throw caughtError; + } finally { + const entries = Array.from(changedFiles.entries()); + entries.sort((a, b) => a[0] < b[0] ? -1 : a[0] > b[0] ? +1 : 0); + + if (showChangedFiles) { + for (const [file, messages] of entries) { + logInfo`Updated: ${path.relative(dataPath, file)}`; + for (const message of messages) { + console.log(colors.dim(` - ${message}`)); + } + } + } + + if (!errored) { + return new Map(entries); + } + } +} diff --git a/src/upd8.js b/src/upd8.js index 2091e5ba..e9353007 100755 --- a/src/upd8.js +++ b/src/upd8.js @@ -54,6 +54,7 @@ import {bindReverse} from '#reverse'; import {writeSearchData} from '#search'; import {sortByName} from '#sort'; import thingConstructors from '#things'; +import {disableCuratedURLValidation} from '#validators'; import {identifyAllWebRoutes} from '#web-routes'; import { @@ -80,6 +81,7 @@ import { empty, filterMultipleArrays, indentWrap as unboundIndentWrap, + stitchArrays, withEntries, } from '#sugar'; @@ -114,6 +116,7 @@ import { import FileSizePreloader from './file-size-preloader.js'; import {listingSpec, listingTargetSpec} from './listing-spec.js'; import * as buildModes from './write/build-modes/index.js'; +import * as tidyModes from './write/tidy-modes/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -131,6 +134,7 @@ const STATUS_NOT_APPLICABLE = `not applicable`; const STATUS_STARTED_NOT_DONE = `started but not yet done`; const STATUS_DONE_CLEAN = `done without warnings`; const STATUS_FATAL_ERROR = `fatal error`; +const STATUS_INVALID_SIGNAL = `invalid exit signal`; const STATUS_HAS_WARNINGS = `has warnings`; const defaultStepStatus = {status: STATUS_NOT_STARTED, annotation: null}; @@ -199,10 +203,6 @@ async function main() { {...defaultStepStatus, name: `precache nearly all data`, for: ['build']}, - sortWikiDataSourceFiles: - {...defaultStepStatus, name: `apply sorting rules to wiki data files`, - for: ['build']}, - checkWikiDataSourceFileSorting: {...defaultStepStatus, name: `check sorting rules against wiki data files`}, @@ -251,6 +251,14 @@ async function main() { {...defaultStepStatus, name: `identify web routes`, for: ['build']}, + reformatCuratedURLs: + {...defaultStepStatus, name: `reformat curated URLs`, + for: ['build']}, + + sortWikiDataSourceFiles: + {...defaultStepStatus, name: `apply sorting rules to wiki data files`, + for: ['build']}, + performBuild: {...defaultStepStatus, name: `perform selected build mode`, for: ['build']}, @@ -272,32 +280,50 @@ async function main() { const defaultQueueSize = 500; - const buildModeFlagOptions = ( + const buildModeFlagOptions = withEntries(buildModes, entries => entries.map(([key, mode]) => [key, { help: mode.description, type: 'flag', - }]))); + }])); - const selectedBuildModeFlags = Object.keys( - await parseOptions(process.argv.slice(2), { - [parseOptions.handleUnknown]: () => {}, - ...buildModeFlagOptions, - })); + const selectedBuildModeFlags = + Object.keys( + await parseOptions(process.argv.slice(2), { + [parseOptions.handleUnknown]: () => {}, + ...buildModeFlagOptions, + })); - let selectedBuildModeFlag; - let sortInAdditionToBuild = false; + const tidyModeFlagOptions = + withEntries(tidyModes, entries => + entries.map(([key, mode]) => [key, { + help: mode.description, + type: 'flag', + }])); + + const selectedTidyModeFlags = + Object.keys( + await parseOptions(process.argv.slice(2), { + [parseOptions.handleUnknown]: () => {}, + ...tidyModeFlagOptions, + })); - // As an exception, --sort can be combined with another build mode. - if (selectedBuildModeFlags.length >= 2 && selectedBuildModeFlags.includes('sort')) { - sortInAdditionToBuild = true; - selectedBuildModeFlags.splice(selectedBuildModeFlags.indexOf('sort'), 1); + if (selectedTidyModeFlags.includes('format-urls')) { + Object.assign(stepStatusSummary.reformatCuratedURLs, { + status: STATUS_NOT_STARTED, + annotation: `--format-urls provided`, + }); + } else { + Object.assign(stepStatusSummary.reformatCuratedURLs, { + status: STATUS_NOT_APPLICABLE, + annotation: `--format-urls not provided`, + }); } - if (sortInAdditionToBuild) { + if (selectedTidyModeFlags.includes('sort')) { Object.assign(stepStatusSummary.sortWikiDataSourceFiles, { status: STATUS_NOT_STARTED, - annotation: `--sort provided with another build mode`, + annotation: `--sort provided`, }); Object.assign(stepStatusSummary.checkWikiDataSourceFileSorting, { @@ -316,17 +342,15 @@ async function main() { }); } - if (empty(selectedBuildModeFlags)) { - // No build mode selected. This is not a valid state for building the wiki, - // but we want to let access to --help, so we'll show a message about what - // to do later. - selectedBuildModeFlag = null; - } else if (selectedBuildModeFlags.length > 1) { - logError`Building multiple modes (${selectedBuildModeFlags.join(', ')}) at once not supported.`; - logError`Please specify one build mode.`; - return false; - } else { - selectedBuildModeFlag = selectedBuildModeFlags[0]; + let selectedBuildModeFlag; + switch (selectedBuildModeFlags.length) { + case 0: selectedBuildModeFlag = null; break; + case 1: selectedBuildModeFlag = selectedBuildModeFlags[0]; break; + default: { + logError`Building multiple modes (${selectedBuildModeFlags.join(', ')}) at once not supported.`; + logError`Please specify one build mode.`; + return false; + } } const selectedBuildMode = @@ -334,16 +358,25 @@ async function main() { ? buildModes[selectedBuildModeFlag] : null); - // This is about to get a whole lot more stuff put in it. - const wikiData = { - listingSpec, - listingTargetSpec, - }; + const selectedTidyModes = + selectedTidyModeFlags + .map(flag => tidyModes[flag]); - const buildOptions = - (selectedBuildMode - ? selectedBuildMode.getCLIOptions() - : {}); + const tidyingOnly = + !selectedBuildMode && + !empty(selectedTidyModes); + + const selectedBuildModeOptions = + selectedBuildMode?.getCLIOptions?.() ?? + {}; + + const selectedTidyModeOptions = + selectedTidyModes.map(tidyMode => + tidyMode.getCLIOptions?.() ?? + {}); + + const selectedTidyModeOptionsFlat = + Object.fromEntries(selectedTidyModeOptions.flat()); const commonOptions = { 'help': { @@ -452,6 +485,11 @@ async function main() { type: 'flag', }, + 'skip-curated-url-validation': { + help: `Skips checking if URLs match a set of standardizing rules; only intended for use with old data`, + type: 'flag', + }, + 'skip-file-sizes': { help: `Skips preloading file sizes for images and additional files, which will be left blank in the build`, type: 'flag', @@ -580,9 +618,11 @@ async function main() { // here, even though we won't be doing anything with them later. // (This is a bit of a hack.) ...buildModeFlagOptions, + ...tidyModeFlagOptions, ...commonOptions, - ...buildOptions, + ...selectedTidyModeOptionsFlat, + ...selectedBuildModeOptions, }); shouldShowStepStatusSummary = cliOptions['show-step-summary'] ?? false; @@ -599,7 +639,7 @@ async function main() { `and website content/structure ` + `from provided data, media, and language directories.\n` + `\n` + - `CLI options are divided into three groups:\n`)); + `CLI options are divided into five groups:\n`)); console.log(` 1) ` + indentWrap( `Common options: ` + @@ -608,37 +648,63 @@ async function main() { {spaces: 4, bullet: true})); console.log(` 2) ` + indentWrap( + `Tidying mode selection: ` + + `One or more tidying mode may be selected, ` + + `and they adjust the contents of data files ` + + `to satisfy predefined or data-configured standardization rules`, + {spaces: 4, bullet: true})); + + console.log(` 3) ` + indentWrap( + `Tidying mode options: ` + + `Each tidy mode may `)) + + console.log(` 4) ` + indentWrap( `Build mode selection: ` + `One build mode should be selected, ` + `and it decides the main set of behavior to use ` + `for presenting or interacting with site content`, {spaces: 4, bullet: true})); - console.log(` 3) ` + indentWrap( - `Build options: ` + + console.log(` 5) ` + indentWrap( + `Build mode options: ` + `Each build mode has a set of unique options ` + `which customize behavior for that build mode`, {spaces: 4, bullet: true})); + console.log(`All options may be specified in any order.`); + console.log(``); showHelpForOptions({ heading: `Common options`, options: commonOptions, - wrap, }); + showHelpForOptions({ + heading: `Tidying mode selection`, + options: tidyModeFlagOptions, + }); + + stitchArrays({ + flag: selectedTidyModeFlags, + options: selectedTidyModeOptions, + }).forEach(({flag, options}) => { + showHelpForOptions({ + heading: `Options for tidying mode --${flag}`, + options, + silentIfNoOptions: false, + }); + }); + showHelpForOptions({ heading: `Build mode selection`, options: buildModeFlagOptions, - wrap, }); if (selectedBuildMode) { showHelpForOptions({ - heading: `Build options for --${selectedBuildModeFlag}`, - options: buildOptions, - wrap, + heading: `Options for build mode --${selectedBuildModeFlag}`, + options: selectedBuildModeOptions, }); } else { console.log( @@ -704,6 +770,34 @@ async function main() { }); } + if (tidyingOnly) { + Object.assign(stepStatusSummary.performBuild, { + status: STATUS_NOT_APPLICABLE, + annotation: `tidying modes provided`, + }); + + for (const key of [ + 'preloadFileSizes', + 'watchLanguageFiles', + 'verifyImagePaths', + 'buildSearchIndex', + 'generateThumbnails', + 'identifyWebRoutes', + 'checkWikiDataSourceFileSorting', + ]) { + Object.assign(stepStatusSummary[key], { + status: STATUS_NOT_APPLICABLE, + annotation: `tidying modes provided without build mode`, + }); + } + } + + if (cliOptions['skip-curated-url-validation']) { + logWarn`Won't check if any URLs match the curated URL rules this run`; + logWarn `(--skip-curated-url-validation passed).`; + disableCuratedURLValidation(); + } + // Finish setting up defaults by combining information from all options. const _fallbackStep = (stepKey, { @@ -960,6 +1054,10 @@ async function main() { break decideBuildSearchIndex; } + if (tidyingOnly) { + break decideBuildSearchIndex; + } + const indexFile = path.join(wikiCachePath, 'search', 'index.json') let stats; try { @@ -1478,6 +1576,12 @@ async function main() { timeStart: Date.now(), }); + // This is about to get a whole lot more stuff put in it. + const wikiData = { + listingSpec, + listingTargetSpec, + }; + let yamlDataSteps; let yamlDocumentProcessingAggregate; @@ -1993,40 +2097,7 @@ async function main() { }); } - if (stepStatusSummary.sortWikiDataSourceFiles.status === STATUS_NOT_STARTED) { - Object.assign(stepStatusSummary.sortWikiDataSourceFiles, { - status: STATUS_STARTED_NOT_DONE, - timeStart: Date.now(), - }); - - const {SortingRule} = thingConstructors; - const results = - await Array.fromAsync(SortingRule.go({dataPath, wikiData})); - - if (results.some(result => result.changed)) { - logInfo`Updated data files to satisfy sorting.`; - logInfo`Restarting automatically, since that's now needed!`; - - Object.assign(stepStatusSummary.sortWikiDataSourceFiles, { - status: STATUS_DONE_CLEAN, - annotation: `changes cueing restart`, - timeEnd: Date.now(), - memory: process.memoryUsage(), - }); - - return 'restart'; - } else { - logInfo`All sorting rules are satisfied. Nice!`; - paragraph = false; - - Object.assign(stepStatusSummary.sortWikiDataSourceFiles, { - status: STATUS_DONE_CLEAN, - annotation: `no changes needed`, - timeEnd: Date.now(), - memory: process.memoryUsage(), - }); - } - } else if (stepStatusSummary.checkWikiDataSourceFileSorting.status === STATUS_NOT_STARTED) { + if (stepStatusSummary.checkWikiDataSourceFileSorting.status === STATUS_NOT_STARTED) { Object.assign(stepStatusSummary.checkWikiDataSourceFileSorting, { status: STATUS_STARTED_NOT_DONE, timeStart: Date.now(), @@ -3197,10 +3268,79 @@ async function main() { quickstat.reset(); + let restartBeforeBuild = false; + const updatedTidyModes = []; + + for (const [step, tidyMode] of [ + ['reformatCuratedURLs', 'format-urls'], + ['sortWikiDataSourceFiles', 'sort'], + ]) { + if (stepStatusSummary[step].status !== STATUS_NOT_STARTED) { + continue; + } + + Object.assign(stepStatusSummary[step], { + status: STATUS_STARTED_NOT_DONE, + timeStart: Date.now(), + }); + + const tidySignal = + await tidyModes[tidyMode].go({ + wikiData, + dataPath, + tidyingOnly, + }); + + switch (tidySignal) { + case 'clean': { + Object.assign(stepStatusSummary[step], { + status: STATUS_DONE_CLEAN, + annotation: `no changes needed`, + timeEnd: Date.now(), + memory: process.memoryUsage(), + }); + + break; + } + + case 'updated': { + Object.assign(stepStatusSummary[step], { + status: STATUS_DONE_CLEAN, + annotation: `changes cueing restart`, + timeEnd: Date.now(), + memory: process.memoryUsage(), + }); + + restartBeforeBuild = true; + updatedTidyModes.push(tidyMode); + + break; + } + + default: { + Object.assign(stepStatusSummary[step], { + status: STATUS_INVALID_SIGNAL, + annotation: `unknown: ${tidySignal}`, + timeEnd: Date.now(), + memory: process.memoryUsage(), + }); + + logError`Invalid exit signal for ${'--' + tidyMode}: ${tidySignal}`; + fileIssue(); + + return false; + } + } + } + if (stepStatusSummary.performBuild.status === STATUS_NOT_APPLICABLE) { return true; } + if (restartBeforeBuild) { + return 'restart'; + } + const developersComment = `