From cf471534f7a7a4d3b0752fc9c61e71388d14b1a1 Mon Sep 17 00:00:00 2001 From: "(quasar) nebula" Date: Tue, 21 Oct 2025 17:48:39 -0300 Subject: language: countWords() --- src/data/things/language.js | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/data/things/language.js b/src/data/things/language.js index 0354b6ac..91774761 100644 --- a/src/data/things/language.js +++ b/src/data/things/language.js @@ -4,7 +4,7 @@ import {withAggregate} from '#aggregate'; import CacheableObject from '#cacheable-object'; import {input} from '#composite'; import * as html from '#html'; -import {empty, withEntries} from '#sugar'; +import {accumulateSum, empty, withEntries} from '#sugar'; import {isLanguageCode} from '#validators'; import Thing from '#thing'; import {languageOptionRegex} from '#wiki-data'; @@ -106,6 +106,7 @@ export class Language extends Thing { intl_listUnit: this.#intlHelper(Intl.ListFormat, {type: 'unit'}), intl_pluralCardinal: this.#intlHelper(Intl.PluralRules, {type: 'cardinal'}), intl_pluralOrdinal: this.#intlHelper(Intl.PluralRules, {type: 'ordinal'}), + intl_wordSegmenter: this.#intlHelper(Intl.Segmenter, {granularity: 'word'}), validKeys: { flags: {expose: true}, @@ -163,6 +164,15 @@ export class Language extends Thing { } } + countWords(text) { + this.assertIntlAvailable('intl_wordSegmenter'); + + const string = html.resolve(text, {normalize: 'plain'}); + const segments = this.intl_wordSegmenter.segment(string); + + return accumulateSum(segments, segment => segment.isWordLike ? 1 : 0); + } + getUnitForm(value) { this.assertIntlAvailable('intl_pluralCardinal'); return this.intl_pluralCardinal.select(value); -- cgit 1.3.0-6-gf8a5