content, replacer: match inline links, auto-provide custom label

Changes in matchMarkdownLinks here are refactoring only, not new behavior.
author: (quasar) nebula <qznebula@protonmail.com> 2025-09-03 16:55:19 -0300
committer: (quasar) nebula <qznebula@protonmail.com> 2025-09-03 16:56:06 -0300
commit: c7f6383e34c30b63e0e0b86f320f42f2c9a0bdb7 (patch)
tree: 9fe61e6ccb8afd6c474cf667634d2c7f504247a0
parent: 0c70a112914f7eb4bea0d839d44060cb30f4f30e (diff)
3 files changed, 99 insertions, 16 deletions
diff --git a/src/common-util/wiki-data.js b/src/common-util/wiki-data.js
index cb024022..e0f41ee1 100644
--- a/src/common-util/wiki-data.js
+++ b/src/common-util/wiki-data.js
@@ -535,24 +535,46 @@ export function combineWikiDataArrays(arrays) {
 export function* matchMarkdownLinks(markdownSource, {marked}) {
   const plausibleLinkRegexp = /\[(?=.*?\))/g;
 
+  // Pedantic rules use more particular parentheses detection in link
+  // destinations - they allow one level of balanced parentheses, and
+  // otherwise, parentheses must be escaped. This allows for entire links
+  // to be wrapped in parentheses, e.g below:
+  //
+  //   This is so cool. ([You know??](https://example.com))
+  //
+  const definiteLinkRegexp = marked.Lexer.rules.inline.pedantic.link;
+
   let plausibleMatch = null;
   while (plausibleMatch = plausibleLinkRegexp.exec(markdownSource)) {
-    // Pedantic rules use more particular parentheses detection in link
-    // destinations - they allow one level of balanced parentheses, and
-    // otherwise, parentheses must be escaped. This allows for entire links
-    // to be wrapped in parentheses, e.g below:
-    //
-    //   This is so cool. ([You know??](https://example.com))
-    //
     const definiteMatch =
-      marked.Lexer.rules.inline.pedantic.link
-        .exec(markdownSource.slice(plausibleMatch.index));
+      definiteLinkRegexp.exec(markdownSource.slice(plausibleMatch.index));
 
-    if (definiteMatch) {
-      const [{length}, label, href] = definiteMatch;
-      const index = plausibleMatch.index + definiteMatch.index;
+    if (!definiteMatch) {
+      continue;
+    }
+
+    const [{length}, label, href] = definiteMatch;
+    const index = plausibleMatch.index + definiteMatch.index;
 
-      yield {label, href, index, length};
+    yield {label, href, index, length};
+  }
+}
+
+export function* matchInlineLinks(source) {
+  const plausibleLinkRegexp = /\b[a-z]*:\/\/[^ ]*?(?=(?:[,.!?]*)(?:\s|$))/gm;
+
+  let plausibleMatch = null;
+  while (plausibleMatch = plausibleLinkRegexp.exec(source)) {
+    const [href] = plausibleMatch;
+    const {index} = plausibleMatch;
+    const [{length}] = plausibleMatch;
+
+    try {
+      new URL(href);
+    } catch {
+      continue;
     }
+
+    yield {href, length, index};
   }
 }
diff --git a/src/content/dependencies/transformContent.js b/src/content/dependencies/transformContent.js
index e9a75744..a6639acd 100644
--- a/src/content/dependencies/transformContent.js
+++ b/src/content/dependencies/transformContent.js
@@ -601,9 +601,12 @@ export default {
           }
 
           case 'external-link': {
-            const {label} = node.data;
             const externalLink = relations.externalLinks[externalLinkIndex++];
 
+            const label =
+              node.data.label ??
+              node.data.href.replace(/^https?:\/\//, '');
+
             if (slots.textOnly) {
               return {type: 'text', data: label};
             }
diff --git a/src/replacer.js b/src/replacer.js
index 779ee78d..78f3e104 100644
--- a/src/replacer.js
+++ b/src/replacer.js
@@ -9,7 +9,7 @@ import * as marked from 'marked';
 
 import * as html from '#html';
 import {empty, escapeRegex, typeAppearance} from '#sugar';
-import {matchMarkdownLinks} from '#wiki-data';
+import {matchInlineLinks, matchMarkdownLinks} from '#wiki-data';
 
 export const replacerSpec = {
   'album': {
@@ -794,7 +794,7 @@ export function postprocessSummaries(inputNodes) {
 }
 
 export function postprocessExternalLinks(inputNodes) {
-  const outputNodes = [];
+  let outputNodes = [];
 
   for (const node of inputNodes) {
     if (node.type !== 'text') {
@@ -850,6 +850,64 @@ export function postprocessExternalLinks(inputNodes) {
     }
   }
 
+  // Repeat everything, but for inline links, which are just a URL on its own,
+  // not formatted as a Markdown link. These don't have provided labels, and
+  // get labels automatically filled in by content code.
+
+  inputNodes = outputNodes;
+  outputNodes = [];
+
+  for (const node of inputNodes) {
+    if (node.type !== 'text') {
+      outputNodes.push(node);
+      continue;
+    }
+
+    let textNode = {
+      i: node.i,
+      iEnd: null,
+      type: 'text',
+      data: '',
+    };
+
+    let parseFrom = 0;
+    for (const match of matchInlineLinks(node.data)) {
+      const {href, index, length} = match;
+
+      textNode.data += node.data.slice(parseFrom, index);
+
+      if (textNode.data) {
+        textNode.iEnd = textNode.i + textNode.data.length;
+        outputNodes.push(textNode);
+
+        textNode = {
+          i: node.i + index + length,
+          iEnd: null,
+          type: 'text',
+          data: '',
+        };
+      }
+
+      outputNodes.push({
+        i: node.i + index,
+        iEnd: node.i + index + length,
+        type: 'external-link',
+        data: {label: null, href},
+      });
+
+      parseFrom = index + length;
+    }
+
+    if (parseFrom !== node.data.length) {
+      textNode.data += node.data.slice(parseFrom);
+      textNode.iEnd = node.iEnd;
+    }
+
+    if (textNode.data) {
+      outputNodes.push(textNode);
+    }
+  }
+
   return outputNodes;
 }
author	(quasar) nebula <qznebula@protonmail.com>	2025-09-03 16:55:19 -0300
committer	(quasar) nebula <qznebula@protonmail.com>	2025-09-03 16:56:06 -0300
commit	c7f6383e34c30b63e0e0b86f320f42f2c9a0bdb7 (patch)
tree	9fe61e6ccb8afd6c474cf667634d2c7f504247a0
parent	0c70a112914f7eb4bea0d839d44060cb30f4f30e (diff)