From 928a8247048d3476d46b0e8817f11fc5b068506a Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Thu, 1 May 2025 16:50:50 -0300 Subject: data: ContentEntry: implicit references via artistText --- src/common-util/wiki-data.js | 7 +- .../composite/things/content/contentArtists.js | 39 ++++++++++ .../withExpressedOrImplicitArtistReferences.js | 60 +++++++++++++++ src/data/composite/things/content/index.js | 1 + src/data/things/content.js | 6 +- src/data/yaml.js | 90 ++++++++++++++++------ 6 files changed, 173 insertions(+), 30 deletions(-) create mode 100644 src/data/composite/things/content/contentArtists.js create mode 100644 src/data/composite/things/content/helpers/withExpressedOrImplicitArtistReferences.js diff --git a/src/common-util/wiki-data.js b/src/common-util/wiki-data.js index b21af786..546f1ad9 100644 --- a/src/common-util/wiki-data.js +++ b/src/common-util/wiki-data.js @@ -57,10 +57,9 @@ export function getKebabCase(name) { // Specific data utilities -// Matches heading details from commentary data in roughly the formats: +// Matches heading details from commentary data in roughly the format: // -// artistReference: (annotation, date) -// artistReference|artistText: (annotation, date) +// artistText: (annotation, date) // // where capturing group "annotation" can be any text at all, except that the // last entry (past a comma or the only content within parentheses), if parsed @@ -95,7 +94,7 @@ const dateRegex = groupName => String.raw`(?<${groupName}>[a-zA-Z]+ [0-9]{1,2}, [0-9]{4,4}|[0-9]{1,2} [^,]*[0-9]{4,4}|[0-9]{1,4}[-/][0-9]{1,4}[-/][0-9]{1,4})`; const commentaryRegexRaw = - String.raw`^(?.+?)(?:\|(?.+))?:<\/i>(?: \((?(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?:(?sometime|throughout|around) )?${dateRegex('date')}(?: ?- ?${dateRegex('secondDate')})?(?: (?captured|accessed) ${dateRegex('accessDate')})?)?\))?`; + String.raw`^(?.+?):<\/i>(?: \((?(?:.*?(?=,|\)[^)]*$))*?)(?:,? ?(?:(?sometime|throughout|around) )?${dateRegex('date')}(?: ?- ?${dateRegex('secondDate')})?(?: (?captured|accessed) ${dateRegex('accessDate')})?)?\))?`; export const commentaryRegexCaseInsensitive = new RegExp(commentaryRegexRaw, 'gmi'); export const commentaryRegexCaseSensitive = diff --git a/src/data/composite/things/content/contentArtists.js b/src/data/composite/things/content/contentArtists.js new file mode 100644 index 00000000..77273047 --- /dev/null +++ b/src/data/composite/things/content/contentArtists.js @@ -0,0 +1,39 @@ +import {input, templateCompositeFrom} from '#composite'; +import {validateReferenceList} from '#validators'; + +import {exitWithoutDependency, exposeDependency} + from '#composite/control-flow'; +import {withResolvedReferenceList} from '#composite/wiki-data'; +import {soupyFind} from '#composite/wiki-properties'; + +import withExpressedOrImplicitArtistReferences + from './helpers/withExpressedOrImplicitArtistReferences.js'; + +export default templateCompositeFrom({ + annotation: `contentArtists`, + + compose: false, + + update: { + validate: validateReferenceList('artist'), + }, + + steps: () => [ + withExpressedOrImplicitArtistReferences({ + from: input.updateValue(), + }), + + exitWithoutDependency({ + dependency: '#artistReferences', + }), + + withResolvedReferenceList({ + list: '#artistReferences', + find: soupyFind.input('artist'), + }), + + exposeDependency({ + dependency: '#resolvedReferenceList', + }), + ], +}); diff --git a/src/data/composite/things/content/helpers/withExpressedOrImplicitArtistReferences.js b/src/data/composite/things/content/helpers/withExpressedOrImplicitArtistReferences.js new file mode 100644 index 00000000..62799d43 --- /dev/null +++ b/src/data/composite/things/content/helpers/withExpressedOrImplicitArtistReferences.js @@ -0,0 +1,60 @@ +import {input, templateCompositeFrom} from '#composite'; + +import {raiseOutputWithoutDependency} from '#composite/control-flow'; +import {withFilteredList, withMappedList} from '#composite/data'; +import {withContentNodes} from '#composite/wiki-data'; + +export default templateCompositeFrom({ + annotation: `withExpressedOrImplicitArtistReferences`, + + inputs: { + from: input({type: 'array', acceptsNull: true}), + }, + + outputs: ['#artistReferences'], + + steps: () => [ + { + dependencies: [input('from')], + compute: (continuation, { + [input('from')]: expressedArtistReferences, + }) => + (expressedArtistReferences + ? continuation.raiseOutput({'#artistReferences': expressedArtistReferences}) + : continuation()), + }, + + raiseOutputWithoutDependency({ + dependency: 'artistText', + output: input.value({'#artistReferences': null}), + }), + + withContentNodes({ + from: 'artistText', + }), + + withMappedList({ + list: '#contentNodes', + map: input.value(node => + node.type === 'tag' && + node.data.replacerKey?.data === 'artist'), + }).outputs({ + '#mappedList': '#artistTagFilter', + }), + + withFilteredList({ + list: '#contentNodes', + filter: '#artistTagFilter', + }).outputs({ + '#filteredList': '#artistTags', + }), + + withMappedList({ + list: '#artistTags', + map: input.value(node => + node.data.replacerValue[0].data), + }).outputs({ + '#mappedList': '#artistReferences', + }), + ], +}); diff --git a/src/data/composite/things/content/index.js b/src/data/composite/things/content/index.js index eeedcdcd..b03db684 100644 --- a/src/data/composite/things/content/index.js +++ b/src/data/composite/things/content/index.js @@ -1,3 +1,4 @@ +export {default as contentArtists} from './contentArtists.js'; export {default as hasAnnotationPart} from './hasAnnotationPart.js'; export {default as withAnnotationParts} from './withAnnotationParts.js'; export {default as withSourceText} from './withSourceText.js'; diff --git a/src/data/things/content.js b/src/data/things/content.js index 7d7f7a6d..60b2c8e0 100644 --- a/src/data/things/content.js +++ b/src/data/things/content.js @@ -16,6 +16,7 @@ import { } from '#composite/control-flow'; import { + contentArtists, hasAnnotationPart, withAnnotationParts, withSourceText, @@ -29,10 +30,7 @@ export class ContentEntry extends Thing { thing: thing(), - artists: referenceList({ - class: input.value(Artist), - find: soupyFind.input('artist'), - }), + artists: contentArtists(), artistText: contentString(), diff --git a/src/data/yaml.js b/src/data/yaml.js index c6600121..f3f422f7 100644 --- a/src/data/yaml.js +++ b/src/data/yaml.js @@ -8,6 +8,7 @@ import {inspect as nodeInspect} from 'node:util'; import yaml from 'js-yaml'; import {colors, ENABLE_COLOR, logInfo, logWarn} from '#cli'; +import {parseInput, splitContentNodesAround} from '#replacer'; import {sortByName} from '#sort'; import Thing from '#thing'; import thingConstructors from '#things'; @@ -828,36 +829,81 @@ export function parseArtwork({ } export function parseContentEntries(thingClass, sourceText, {subdoc}) { - const map = matchEntry => ({ - 'Artists': - matchEntry.artistReferences - .split(',') - .map(ref => ref.trim()), + function map(matchEntry) { + let artistText = null, artistReferences = null; + + const artistTextNodes = + Array.from( + splitContentNodesAround( + parseInput(matchEntry.artistText), + /\|/g)); + + const separatorIndices = + artistTextNodes + .filter(node => node.type === 'separator') + .map(node => artistTextNodes.indexOf(node)); + + if (empty(separatorIndices)) { + if (artistTextNodes.length === 1 && artistTextNodes[0].type === 'text') { + artistReferences = matchEntry.artistText; + } else { + artistText = matchEntry.artistText; + } + } else { + const firstSeparatorIndex = + separatorIndices.at(0); + + const secondSeparatorIndex = + separatorIndices.at(1) ?? + artistTextNodes.length; + + artistReferences = + matchEntry.artistText.slice( + artistTextNodes.at(0).i, + artistTextNodes.at(firstSeparatorIndex - 1).iEnd); + + artistText = + matchEntry.artistText.slice( + artistTextNodes.at(firstSeparatorIndex).iEnd, + artistTextNodes.at(secondSeparatorIndex - 1).iEnd); + } - 'Artist Text': - matchEntry.artistText, + if (artistReferences) { + artistReferences = + artistReferences + .split(',') + .map(ref => ref.trim()); + } - 'Annotation': - matchEntry.annotation, + return { + 'Artists': + artistReferences, - 'Date': - matchEntry.date, + 'Artist Text': + artistText, - 'Second Date': - matchEntry.secondDate, + 'Annotation': + matchEntry.annotation, - 'Date Kind': - matchEntry.dateKind, + 'Date': + matchEntry.date, - 'Access Date': - matchEntry.accessDate, + 'Second Date': + matchEntry.secondDate, - 'Access Kind': - matchEntry.accessKind, + 'Date Kind': + matchEntry.dateKind, - 'Body': - matchEntry.body, - }); + 'Access Date': + matchEntry.accessDate, + + 'Access Kind': + matchEntry.accessKind, + + 'Body': + matchEntry.body, + }; + } const documents = matchContentEntries(sourceText) -- cgit 1.3.0-6-gf8a5