Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 | 372x 187x 187x 184x 184x 269x 22x 247x 4481x 4481x 4481x 22x 53x 53x 131x 131x 131x 53x 53x 53x 53x 53x 484x 484x 83x 83x 79x 2x 2x 2x 2x 2x 2x 2x 302x 74x 74x 17x 17x 17x 405x 2x 2x 2x 2x 403x 3x 3x 3x 400x 405x 405x 405x 405x 157x 157x 157x 243x 66x 66x 66x 66x 66x 17x 17x 177x 111x 111x 111x 19x 19x 92x 92x 92x 111x 400x 28x 400x 65x 65x 335x 352x 352x 12x 12x 340x 340x 106x 126x 39x 39x 154x 154x 3x 13x 3x | /**
* @module data-transformers/helpers
* @description Low-level utility functions for the data transformation
* pipeline: URL sanitisation, Swedish-language span generation, date
* formatting, text cleaning, and document metadata helpers.
*
* @author Hack23 AB
* @license Apache-2.0
*/
import { escapeHtml } from '../html-utils.js';
import type { Language } from '../types/language.js';
import type { ContentLabelSet, CommitteeName } from '../types/content.js';
import { LOCALE_MAP, COMMITTEE_NAMES, CONTENT_LABELS } from './constants.js';
import type { RawCalendarEvent, RawDocument, CIAContext } from './types.js';
/**
* Sanitize a URL for safe use in href attributes.
* Rejects javascript:, data:, vbscript: schemes and returns '#' for invalid URLs.
* Also escapes HTML attribute characters in the URL.
*/
export function sanitizeUrl(url: string | undefined | null): string {
if (!url || typeof url !== 'string') return '#';
const trimmed = url.trim();
// Block dangerous schemes
if (/^(javascript|data|vbscript):/i.test(trimmed)) return '#';
// Only allow http, https, and relative URLs
Iif (/^[a-z]+:/i.test(trimmed) && !/^https?:/i.test(trimmed)) return '#';
// Escape HTML attribute characters
return trimmed.replace(/&/g, '&').replace(/"/g, '"').replace(/'/g, ''').replace(/</g, '<').replace(/>/g, '>');
}
/**
* Emit a Swedish-language span.
*
* For Swedish articles (`lang === 'sv'`) the span carries both the
* `lang="sv"` accessibility attribute AND `data-translate="true"` so
* quality-validation tooling can verify that Swedish articles contain the
* original text.
*
* For **all other** languages the span carries only `lang="sv"` (screen
* readers still know the text is Swedish) but the `data-translate` marker is
* intentionally omitted — it signals "this text should be translated" but no
* client-side translation mechanism exists, so the marker only causes false
* validation failures in non-Swedish articles.
*
* @param escapedText - Already HTML-escaped text content
* @param lang - Target article language (e.g. `'sv'`, `'en'`)
*/
export function svSpan(escapedText: string, lang: Language | string): string {
if (lang === 'sv') {
return `<span data-translate="true" lang="sv">${escapedText}</span>`;
}
return `<span lang="sv">${escapedText}</span>`;
}
/**
* Get localized label with fallback to English
*/
export function L(lang: Language | string, key: string): ContentLabelSet[keyof ContentLabelSet] {
const langLabels = CONTENT_LABELS[lang as Language];
const value = langLabels?.[key as keyof ContentLabelSet];
if (value !== undefined) return value;
return CONTENT_LABELS.en[key as keyof ContentLabelSet];
}
/**
* Check if date is today
*/
export function isTodayDate(date: Date): boolean {
const today = new Date();
return date.getDate() === today.getDate() &&
date.getMonth() === today.getMonth() &&
date.getFullYear() === today.getFullYear();
}
/**
* Format day name (Monday, Tuesday, etc.) using Intl for all 14 languages
*/
export function formatDayName(date: Date, lang: Language | string = 'en'): string {
const locale = LOCALE_MAP[lang] || lang;
try {
return new Intl.DateTimeFormat(locale, { weekday: 'long' }).format(date);
} catch {
return new Intl.DateTimeFormat('en-GB', { weekday: 'long' }).format(date);
}
}
/**
* Format day label (e.g., "February 10 - Monday") using Intl for all 14 languages
*/
export function formatDayLabel(date: Date, lang: Language | string = 'en'): string {
const locale = LOCALE_MAP[lang] || lang;
try {
const dayName = formatDayName(date, lang);
const monthDay = new Intl.DateTimeFormat(locale, { month: 'long', day: 'numeric' }).format(date);
return `${monthDay} - ${dayName}`;
} catch {
const dayName = formatDayName(date, 'en');
const monthDay = new Intl.DateTimeFormat('en-GB', { month: 'long', day: 'numeric' }).format(date);
return `${monthDay} - ${dayName}`;
}
}
/**
* Determine if event is high priority
*/
export function isHighPriority(event: RawCalendarEvent): boolean {
const title = (event.title || event.rubrik || '').toLowerCase();
return (
title.includes('pm') ||
title.includes('prime minister') ||
title.includes('statsminister') ||
title.includes('vote') ||
title.includes('votering') ||
title.includes('eu') ||
title.includes('summit')
);
}
/**
* Parse author and party from raw Swedish motion text.
* Handles "av Fredrik Olovsson m.fl. (S)" and similar patterns.
*/
export function parseMotionAuthorParty(text: string): { author: string; party: string } | null {
const m = text.match(/\bav\s+([^(]+?)\s+\(([A-ZÅÄÖ]{1,5})\)/u);
if (m) return { author: m[1].trim().replace(/\s+/g, ' '), party: m[2] };
return null;
}
/**
* Clean raw Swedish motion notis text into a readable subject.
* Strips "Motion till riksdagen XXXX av AUTHOR (PARTY) med anledning av..."
* and truncates at "Förslag till riksdagsbeslut".
*/
export function cleanMotionText(raw: string): string {
// Minimum cleaned text length before falling back to raw; max excerpt lengths
const MIN_CLEANED = 20;
const MAX_CLEANED = 300;
const MAX_RAW_FALLBACK = 200;
// Truncate at formal ballot section
let text = raw.replace(/Förslag till riksdagsbeslut[\s\S]*/i, '').trim();
// Strip leading "Motion till riksdagen YYYY/YY:NNN av AUTHOR (PARTY) " prefix
text = text.replace(/^Motion till riksdagen\s+\S+\s+av\s+[^(]+\([A-ZÅÄÖ]{1,5}\)\s*/i, '').trim();
// Strip "med anledning av prop. YYYY/YY:NNN " prefix
text = text.replace(/^med anledning av prop\.\s+\S+\s*/i, '').trim();
return text.length > MIN_CLEANED ? text.slice(0, MAX_CLEANED) : raw.slice(0, MAX_RAW_FALLBACK);
}
/**
* Detect when a text string is an MP/politician profile page excerpt rather than
* document content. Returns true for texts that begin with Swedish MP-status phrases
* or contain profile-specific markers such as:
* - "Tjänstgörande riksdagsledamot …" (active MP)
* - "Tidigare riksdagsledamot …" (former MP)
* - "Avgången riksdagsledamot …" (resigned MP)
* - "Tillgänglig ersättare …" (substitute MP)
* - "Tjänstgörande ersättare …" (active substitute)
* - "Tidigare ersättare …" (former substitute)
* - "Tjänstgörande statsrådsersättare" (acting minister substitute)
* - "Tidigare statsråd …" (former minister)
* - "Tidigare statsminister …" (former PM)
* - "Inga uppdrag" (no assignments)
* - "Avgången …" (resigned)
* - "Avliden YYYY-MM-DD …" (deceased MP)
*
* This data comes from the riksdag API's person/ledamot profile pages, and must never
* appear in article document-entry content.
*/
export function isPersonProfileText(text: string): boolean {
if (!text) return false;
const trimmed = text.trimStart();
// Ordered from most specific to least; any match → it is a person profile excerpt
return (
/^Tjänstgörande riksdagsledamot/u.test(trimmed) ||
/^Tidigare riksdagsledamot/u.test(trimmed) ||
/^Avgången riksdagsledamot/u.test(trimmed) ||
/^Tillgänglig ersättare/u.test(trimmed) ||
/^Tjänstgörande ersättare/u.test(trimmed) ||
/^Tidigare ersättare/u.test(trimmed) ||
/^Tjänstgörande statsrådsersättare/u.test(trimmed) ||
/^Tidigare statsråd/u.test(trimmed) ||
/^Tidigare statsminister/u.test(trimmed) ||
/^Inga uppdrag/u.test(trimmed) ||
/^Avgången/u.test(trimmed) ||
// Deceased: "Avliden YYYY-MM-DD ..."
/^Avliden\s+\d{4}-\d{2}-\d{2}/u.test(trimmed) ||
// Contains riksdag email address — always a profile page
/[a-zA-Z0-9._%+-]+@riksdagen\.se/u.test(trimmed) ||
// Contains "Aktuella uppdrag Riksdagsledamot" — profile header
/Aktuella uppdrag\s+Riksdagsledamot/u.test(trimmed)
);
}
/**
* Build a descriptive proposition summary from the ministry organ.
* Returns a ministry-specific framing sentence.
*/
export function propSummaryFromOrgan(organ: string, lang: Language | string): string {
const ministryMap: Record<string, { sv: string; en: string }> = {
Justitiedepartementet: { sv: 'Justitiedepartementets förslag rör rättsliga förändringar.', en: 'This Justice Ministry proposal amends existing legal framework.' },
Finansdepartementet: { sv: 'Finansdepartementets förslag påverkar statsbudget eller finansreglering.', en: 'This Finance Ministry proposal has fiscal or budgetary implications.' },
Försvarsdepartementet: { sv: 'Försvarsdepartementets förslag rör försvars- eller säkerhetspolitik.', en: 'This Defence Ministry proposal concerns national security or defence posture.' },
Utbildningsdepartementet: { sv: 'Utbildningsdepartementets förslag berör skolsystem eller forskning.', en: 'This Education Ministry proposal affects schools, universities or research funding.' },
Socialdepartementet: { sv: 'Socialdepartementets förslag rör välfärd eller socialpolitik.', en: 'This Social Affairs Ministry proposal affects welfare entitlements or social services.' },
Miljödepartementet: { sv: 'Klimat- och miljödepartementets förslag rör klimat- eller miljöpolitik.', en: 'This Climate and Environment Ministry proposal targets emissions or ecological regulation.' },
'Klimat- och miljödepartementet': { sv: 'Klimat- och miljödepartementets förslag rör klimat- eller miljöpolitik.', en: 'This Climate and Environment Ministry proposal targets emissions or ecological regulation.' },
'Klimat- och näringslivsdepartementet': { sv: 'Klimat- och näringslivsdepartementets förslag rör klimat- och näringspolitik.', en: 'This Climate and Enterprise Ministry proposal addresses both environmental and industrial policy.' },
Utrikesdepartementet: { sv: 'Utrikesdepartementets förslag rör utrikespolitik eller internationella relationer.', en: 'This Foreign Affairs Ministry proposal concerns international relations or Sweden’s global obligations.' },
Infrastrukturdepartementet: { sv: 'Infrastrukturdepartementets förslag rör transport eller samhällsinfrastruktur.', en: 'This Infrastructure Ministry proposal affects transport networks or public utilities.' },
};
const entry = ministryMap[organ];
Eif (!entry) return '';
return lang === 'sv' ? entry.sv : entry.en;
}
/**
* Generate enhanced summary from document metadata when summary field is missing
* Uses document type, subtype, organ, and other metadata to create informative placeholder
*/
export function generateEnhancedSummary(doc: RawDocument, type: string, lang: Language | string): string {
// For motions: clean raw Swedish notis text before returning
if ((type === 'motion') && (doc.summary || doc.notis)) {
const raw = (doc.summary || doc.notis || '');
// Skip person-profile data (e.g. "Tjänstgörande riksdagsledamot...", "Avliden 2011-09-20...")
Eif (!isPersonProfileText(raw)) {
Eif (raw.includes('Motion till riksdagen') || raw.includes('Förslag till riksdagsbeslut')) {
return cleanMotionText(raw);
}
return raw;
}
}
// If we have a real summary or notis (not person profile data), use it as-is
if (doc.summary || doc.notis) {
const text = doc.summary || doc.notis || '';
Eif (!isPersonProfileText(text)) {
return text;
}
}
// Generate enhanced summary based on metadata
const organ = doc.organ || doc.committee;
const subtyp = doc.subtyp || doc.subtype;
const doktyp = doc.doktyp || doc.documentType;
// Build contextual summary based on available metadata
const parts: string[] = [];
if (type === 'report' && organ) {
const labelVal = L(lang, 'committeeReport');
parts.push(`${organ} ${typeof labelVal === 'string' ? labelVal : ''}`);
Iif (subtyp) {
const onVal = L(lang, 'on');
parts.push(`${typeof onVal === 'string' ? onVal : ''} ${subtyp}`);
}
} else if (type === 'proposition') {
// Try ministry-specific framing first
const ministrySummary = organ ? propSummaryFromOrgan(organ, lang) : '';
Iif (ministrySummary) {
return ministrySummary;
}
const propLabel = L(lang, 'governmentProposition');
parts.push(typeof propLabel === 'string' ? propLabel : '');
if (organ) {
const referredVal = L(lang, 'referredTo');
parts.push(`${typeof referredVal === 'string' ? referredVal : ''} ${organ}`);
}
} else if (type === 'motion') {
const author = (doc.intressent_namn !== 'Unknown' ? doc.intressent_namn : null) || doc.author;
const party = doc.parti !== 'Unknown' ? doc.parti : undefined;
if (author && party) {
const motionByVal = L(lang, 'motionBy');
parts.push(`${typeof motionByVal === 'string' ? motionByVal : ''} ${author} (${party})`);
I} else if (author) {
const motionByVal = L(lang, 'motionBy');
parts.push(`${typeof motionByVal === 'string' ? motionByVal : ''} ${author}`);
} else {
const parlMotion = L(lang, 'parliamentaryMotion');
parts.push(typeof parlMotion === 'string' ? parlMotion : '');
}
Iif (subtyp) {
const onVal = L(lang, 'on');
parts.push(`${typeof onVal === 'string' ? onVal : ''} ${subtyp}`);
}
}
// Add document type information if useful
if (doktyp && doktyp !== type) {
parts.push(`(${doktyp})`);
}
// Fallback to default if no useful metadata
if (parts.length === 0) {
const fallback = type === 'report' ? L(lang, 'reportDefault') :
type === 'proposition' ? L(lang, 'propDefault') :
L(lang, 'motionDefault');
return typeof fallback === 'string' ? fallback : '';
}
return parts.join(' ') + '.';
}
/**
* Get human-readable committee name from code
*/
export function getCommitteeName(code: string | undefined, lang: Language | string): string {
Iif (!code) {
const unknownVal = L(lang, 'unknown');
return typeof unknownVal === 'string' ? unknownVal : 'Unknown';
}
if (code === 'unknown') {
const otherVal = L(lang, 'otherCommittee');
return typeof otherVal === 'string' ? otherVal : 'Other committees';
}
const entry: CommitteeName | undefined = COMMITTEE_NAMES[code];
if (!entry) return code;
// Use Swedish name for sv, English for all others (other languages get translated via data-translate)
return lang === 'sv' ? entry.sv : entry.en;
}
/**
* Extract the most analytically useful excerpt from full document text.
* Returns first substantive paragraph (skips short headings/metadata lines).
*/
export function extractKeyPassage(fullText: string | undefined, maxChars = 600): string {
if (!fullText) return '';
// Strip HTML tags if present
const plain = fullText.replace(/<[^>]+>/g, ' ').replace(/\s+/g, ' ').trim();
Eif (plain.length <= maxChars) return plain;
// Find a sentence boundary near maxChars
const cut = plain.lastIndexOf('.', maxChars);
return cut > 100 ? plain.slice(0, cut + 1) : plain.slice(0, maxChars) + '…';
}
/**
* Normalise a raw `parti` field to a canonical party key.
* Maps missing, empty, or any capitalisation of 'unknown' to 'other'.
* Used in both generateMotionsContent (party grouping) and
* generateOppositionStrategySection so both sections treat the sentinel
* identically regardless of capitalisation.
*/
export function normalizePartyKey(parti: unknown): string {
const raw = typeof parti === 'string' ? parti.trim() : '';
return !raw || raw.toLowerCase() === 'unknown' ? 'other' : raw;
}
/**
* Look up party motion success rate from CIA context.
* Returns null when data is unavailable so callers can skip the annotation.
*/
export function partyMotionSuccessRate(party: string | undefined, cia: CIAContext | undefined): number | null {
Iif (!cia || !party) return null;
const p = cia.partyPerformance.find(x => x.id === party || x.partyName.toLowerCase().startsWith(party.toLowerCase()));
return p ? p.metrics.successRate : null;
}
|