All files / scripts/data-transformers document-analysis.ts

83.23% Statements 144/173
75.6% Branches 155/205
56.41% Functions 22/39
83.53% Lines 137/164

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389                                                        16x   16x                     101x     101x 101x 3x 98x 3x   95x       101x 101x   101x         101x 101x     101x 39x       101x 101x 98x     101x       16x     16x               102x 102x                     59x 59x 59x 102x 102x 102x 6x 6x   96x     59x               36x 36x 56x 56x 56x   36x       21x 21x 47x 47x 47x     21x 42x 21x   21x   21x     21x 21x 26x   21x   21x 21x     21x       21x 21x     21x   21x             21x 1x       2x 2x     1x   1x       21x 21x     21x 21x           21x 21x   21x   21x 21x 21x 21x 21x 1x       2x 2x     1x   1x       21x 21x         21x             102x 102x 102x     102x         102x 102x     102x     102x 83x 83x 83x 4x 4x     102x 102x         102x 102x   102x   102x       102x 102x   102x                                 126x     126x           126x     126x   126x     126x 126x 39x 39x       87x       126x 126x     126x 126x                       126x   3x 3x         3x 3x 3x                 126x 101x 101x 101x     101x           101x 2x                   126x    
/**
 * @module data-transformers/document-analysis
 * @description Document grouping, opposition strategy analysis, and
 * per-document intelligence analysis. Handles motion-to-proposition
 * mapping, party activity breakdown, and speech-enriched analysis.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import { escapeHtml } from '../html-utils.js';
import type { Language } from '../types/language.js';
import type { RawDocument, CIAContext } from './types.js';
import {
  L,
  svSpan,
  sanitizeUrl,
  isPersonProfileText,
  cleanMotionText,
  parseMotionAuthorParty,
  extractKeyPassage,
  generateEnhancedSummary,
  normalizePartyKey,
  partyMotionSuccessRate,
} from './helpers.js';
import { detectPolicyDomains, generatePolicySignificance, generateDeepPolicyAnalysis } from './policy-analysis.js';
 
/** Committee codes with known high-influence weighting */
const HIGH_INFLUENCE_COMMITTEES = new Set(['FiU', 'KU', 'JuU', 'UU', 'FöU', 'SoU']);
/** Document types that carry higher parliamentary influence */
const HIGH_INFLUENCE_TYPES = new Set(['prop', 'bet', 'skr', 'dir']);
 
/**
 * Calculate an influence score (0-100) for a parliamentary document.
 * Considers committee tier, document type, policy domain breadth,
 * and the presence of full content as a proxy for document depth.
 *
 * @param doc - The document to score
 * @returns Influence score 0-100
 */
export function calculateInfluenceScore(doc: RawDocument): number {
  let score = 0;
 
  // Document type weighting (propositions > reports > motions)
  const docType = doc.doktyp || doc.documentType || '';
  if (HIGH_INFLUENCE_TYPES.has(docType)) {
    score += docType === 'prop' ? 35 : docType === 'bet' ? 30 : 20;
  } else if (docType === 'mot') {
    score += 10;
  } else {
    score += 15; // unknown type gets moderate weight
  }
 
  // Committee tier weighting
  const organ = doc.organ || doc.committee || '';
  Iif (HIGH_INFLUENCE_COMMITTEES.has(organ)) {
    score += 30;
  I} else if (organ) {
    score += 15;
  }
 
  // Policy domain breadth (more domains = broader impact)
  const domains = detectPolicyDomains(doc);
  score += Math.min(20, domains.length * 7);
 
  // Content richness (full text available indicates substantive document)
  if (doc.fullText || doc.fullContent) {
    score += 10;
  }
 
  // Party sponsorship (government documents inherently carry more weight)
  const isGovernment = !doc.parti || doc.doktyp === 'prop';
  if (isGovernment && docType !== 'mot') {
    score += 5;
  }
 
  return Math.min(100, score);
}
 
/** Matches a strict proposition ID (YYYY/YY:NNN) in a motion title. */
const PROP_REFERENCE_REGEX = /med anledning av prop\.\s+(\d{4}\/\d{2}:\d+)/i;
 
/** Captures the descriptive title portion that follows the prop ID. */
export const PROP_TITLE_SUFFIX_REGEX = /med anledning av prop\.\s+\d{4}\/\d{2}:\d+\s*(.*)/i;
 
/**
 * Extract the parent proposition reference (e.g. "2025/26:118") from a motion title.
 * Motions responding to a government proposition have titles like
 * "med anledning av prop. 2025/26:118 Tillståndsprövning enligt förnybartdirektivet".
 */
export function extractPropRef(title: string): string | null {
  const m = title.match(PROP_REFERENCE_REGEX);
  return m?.[1] || null;
}
 
/**
 * Group motions by the parent government proposition they respond to.
 * Motions without a proposition reference are returned separately as "independent".
 */
export function groupMotionsByProposition(motions: RawDocument[]): {
  grouped: Map<string, RawDocument[]>;
  independent: RawDocument[];
} {
  const grouped = new Map<string, RawDocument[]>();
  const independent: RawDocument[] = [];
  for (const motion of motions) {
    const title = motion.titel || motion.title || '';
    const ref = extractPropRef(title);
    if (ref) {
      if (!grouped.has(ref)) grouped.set(ref, []);
      grouped.get(ref)!.push(motion);
    } else {
      independent.push(motion);
    }
  }
  return { grouped, independent };
}
 
/**
 * Group propositions by their referred committee (organ/committee field).
 * Propositions without a committee use the empty-string key.
 */
export function groupPropositionsByCommittee(propositions: RawDocument[]): Map<string, RawDocument[]> {
  const map = new Map<string, RawDocument[]>();
  for (const prop of propositions) {
    const key = prop.organ ?? prop.committee ?? '';
    if (!map.has(key)) map.set(key, []);
    map.get(key)!.push(prop);
  }
  return map;
}
 
export function generateOppositionStrategySection(motions: RawDocument[], lang: Language | string): string {
  const byParty: Record<string, RawDocument[]> = {};
  motions.forEach(m => {
    const party = normalizePartyKey(m.parti);
    if (!byParty[party]) byParty[party] = [];
    byParty[party].push(m);
  });
 
  const sortedParties = Object.entries(byParty)
    .filter(([p]) => p !== 'other')
    .sort(([, a], [, b]) => b.length - a.length);
 
  Iif (sortedParties.length === 0) return '';
 
  const [topParty, topMotions] = sortedParties[0];
 
  // Identify primary policy domain(s) for the most-active party
  const topDomainSet = new Set<string>();
  topMotions.forEach(m => {
    detectPolicyDomains(m, lang).forEach(d => topDomainSet.add(d));
  });
  const topDomains = Array.from(topDomainSet).slice(0, 2);
 
  const count = topMotions.length;
  const safeParty = escapeHtml(topParty);
 
  // Per-language conjunction for domain list
  const conjunctions: Record<string, string> = {
    sv: ' och ', da: ' og ', no: ' og ', fi: ' ja ', de: ' und ', fr: ' et ',
    es: ' y ', nl: ' en ', ar: ' و', he: ' ו', ja: '・', ko: '·', zh: '和',
  };
  const conjunction = conjunctions[lang as string] ?? ' and ';
  const domainList = topDomains.join(conjunction);
 
  // Per-language lead templates: {party} leads with {count} motions, focused on {domains}.
  const leadsVal = L(lang, 'partyLeadsOpposition') as string | undefined;
  let text: string;
  Iif (typeof leadsVal === 'string' && leadsVal !== '') {
    // If constant available in CONTENT_LABELS — use it directly
    text = leadsVal
      .replace('{party}', `<strong>${safeParty}</strong>`)
      .replace('{count}', String(count));
  } else {
    // Fallback inline templates per language
    const templates: Record<string, (p: string, n: number) => string> = {
      sv: (p, n) => `<strong>${p}</strong> är mest aktiv med ${n} motion${n !== 1 ? 'er' : ''}`,
      da: (p, n) => `<strong>${p}</strong> fører med ${n} forslag`,
      no: (p, n) => `<strong>${p}</strong> leder med ${n} forslag`,
      fi: (p, n) => `<strong>${p}</strong> johtaa ${n} aloitteella`,
      de: (p, n) => `<strong>${p}</strong> führt mit ${n} Antrag${n !== 1 ? 'en' : ''}`,
      fr: (p, n) => `<strong>${p}</strong> mène avec ${n} motion${n !== 1 ? 's' : ''}`,
      es: (p, n) => `<strong>${p}</strong> lidera con ${n} mocion${n !== 1 ? 'es' : ''}`,
      nl: (p, n) => `<strong>${p}</strong> leidt met ${n} motie${n !== 1 ? 's' : ''}`,
      ar: (p, n) => `<strong>${p}</strong> يتصدر بـ${n} اقتراح`,
      he: (p, n) => `<strong>${p}</strong> מוביל עם ${n} הצעות`,
      ja: (p, n) => `<strong>${p}</strong>が${n}件の動議で最も活発`,
      ko: (p, n) => `<strong>${p}</strong>이(가) ${n}건의 동의로 선두`,
      zh: (p, n) => `<strong>${p}</strong>以${n}项动议领先`,
    };
    const tpl = templates[lang as string];
    text = tpl ? tpl(safeParty, count) : `<strong>${safeParty}</strong> leads opposition activity with ${count} motion${count !== 1 ? 's' : ''}`;
  }
 
  Eif (domainList) {
    const focusTemplates: Record<string, string> = {
      sv: ', med fokus på ', da: ', med fokus på ', no: ', med fokus på ',
      fi: ', painopisteenä ', de: ', mit Fokus auf ', fr: ', axé sur ',
      es: ', centrado en ', nl: ', gericht op ', ar: '، بالتركيز على ',
      he: ', בדגש על ', ja: '、', ko: ', ', zh: ',重点关注',
    };
    const focusPrefix = focusTemplates[lang as string] ?? ', focused on ';
    text += `${focusPrefix}${escapeHtml(domainList)}`;
  }
  text += '.';
 
  Eif (sortedParties.length > 1) {
    const [secondParty, secondMotions] = sortedParties[1];
    const n = secondMotions.length;
    const safeSecond = escapeHtml(secondParty);
    const followTemplates: Record<string, (p: string, n: number) => string> = {
      sv: (p, c) => ` ${p} följer med ${c} motion${c !== 1 ? 'er' : ''}.`,
      da: (p, c) => ` ${p} følger med ${c} forslag.`,
      no: (p, c) => ` ${p} følger med ${c} forslag.`,
      fi: (p, c) => ` ${p} seuraa ${c} aloitteella.`,
      de: (p, c) => ` ${p} folgt mit ${c} Antrag${c !== 1 ? 'en' : ''}.`,
      fr: (p, c) => ` ${p} suit avec ${c} motion${c !== 1 ? 's' : ''}.`,
      es: (p, c) => ` ${p} sigue con ${c} mocion${c !== 1 ? 'es' : ''}.`,
      nl: (p, c) => ` ${p} volgt met ${c} motie${c !== 1 ? 's' : ''}.`,
      ar: (p, c) => ` ${p} يتبع بـ${c} اقتراح.`,
      he: (p, c) => ` ${p} עוקב עם ${c} הצעות.`,
      ja: (p, c) => ` ${p}が${c}件で続きます。`,
      ko: (p, c) => ` ${p}이(가) ${c}건으로 뒤를 잇습니다.`,
      zh: (p, c) => ` ${p}以${c}项紧随其后。`,
    };
    const followTpl = followTemplates[lang as string];
    text += followTpl
      ? followTpl(safeSecond, n)
      : ` ${safeSecond} follows with ${n} motion${n !== 1 ? 's' : ''}.`;
  }
 
  return `    <p>${text}</p>\n`;
}
 
/**
 * Render a single motion entry div (shared between flat list and themed sections).
 */
export function renderMotionEntry(motion: RawDocument, lang: Language | string): string {
  const titleText = motion.titel || motion.title || '';
  const escapedTitle = escapeHtml(titleText);
  const titleHtml = (motion.titel && !motion.title)
    ? svSpan(escapedTitle, lang)
    : escapedTitle;
  const docName = escapeHtml(motion.dokumentnamn || motion.dok_id || titleText);
 
  // Use enriched author and party data, with fallback parsing from raw notis.
  // Treat 'Unknown' sentinel (set by enrichDocumentsWithContent) as missing so
  // we attempt parseMotionAuthorParty before giving up.
  const unknownVal = L(lang, 'unknown');
  let authorName = (motion.intressent_namn !== 'Unknown' ? motion.intressent_namn : null)
                || (motion.author !== 'Unknown' ? motion.author : null)
                || '';
  let partyName = (motion.parti !== 'Unknown' ? motion.parti : '') || '';
  // Fire fallback when EITHER author or party is missing — covers the party-only sentinel case
  // where intressent_namn is valid but parti was 'Unknown' and stripped to ''.
  if (!authorName || !partyName) {
    const rawText = motion.undertitel || motion.summary || motion.notis || motion.fullText || motion.titel || motion.rubrik || '';
    const parsed = parseMotionAuthorParty(rawText);
    if (parsed) {
      if (parsed.author && !authorName) authorName = parsed.author;
      Eif (parsed.party && !partyName) partyName = parsed.party;
    }
  }
  if (!authorName) authorName = typeof unknownVal === 'string' ? unknownVal : 'Unknown';
  const authorLine = partyName
    ? `${escapeHtml(authorName)} (${escapeHtml(partyName)})`
    : escapeHtml(authorName);
 
  // Use enhanced summary based on metadata (cleanMotionText strips Swedish boilerplate)
  const summaryText = generateEnhancedSummary(motion, 'motion', lang);
  const motionDefaultVal = L(lang, 'motionDefault');
  // Only wrap in Swedish-language span when the content comes from a Swedish source
  const isSwedishContent = (motion.titel && !motion.title)
    || (motion.summary || motion.notis || '').includes('Motion till riksdagen');
  const summaryHtml = (summaryText && summaryText !== motionDefaultVal && isSwedishContent)
    ? svSpan(escapeHtml(summaryText), lang)
    : escapeHtml(summaryText || (typeof motionDefaultVal === 'string' ? motionDefaultVal : ''));
 
  const readFullVal = L(lang, 'readFullMotion');
  const whyItMattersVal = L(lang, 'whyItMatters');
 
  return `
    <div class="motion-entry">
      <h3>${titleHtml}</h3>
      <p><strong>${L(lang, 'filedBy')}:</strong> ${authorLine}</p>
      <p>${summaryHtml}</p>
      <p><strong>${escapeHtml(String(whyItMattersVal))}:</strong> ${generateDeepPolicyAnalysis(motion, lang, 'mot')}</p>
      <p><a href="${sanitizeUrl(motion.url)}" class="document-link" rel="noopener noreferrer">${escapeHtml(String(readFullVal))}: ${docName}</a></p>
    </div>
`;
}
 
/**
 * Generate per-document analysis.
 * PRIMARY: full document text, policy significance, related speeches.
 * SECONDARY (only when genuinely informative): CIA historical context footnote.
 */
export function generateDocumentIntelligenceAnalysis(doc: RawDocument, docType: string, cia: CIAContext | undefined, lang: Language | string): string {
  const parts: string[] = [];
 
  // Normalise short doktyp codes to the names used by generateEnhancedSummary
  const normalizedType = docType === 'prop' ? 'proposition'
    : docType === 'bet' ? 'report'
    : docType === 'mot' ? 'motion'
    : docType;
 
  // ── PRIMARY: full document text or best available summary ────────────────
  const rawText = doc.fullText || doc.fullContent || doc.summary || doc.notis || '';
  // Discard person-profile data (MP status lines, deceased notices) — these are
  // not document content and must never appear in article document entries.
  const safeRawText = isPersonProfileText(rawText) ? '' : rawText;
  // For motions, clean Swedish boilerplate before extracting passage
  const cleanedText = (normalizedType === 'motion' && safeRawText.includes('Motion till riksdagen'))
    ? cleanMotionText(safeRawText)
    : safeRawText;
  const passage = extractKeyPassage(cleanedText, 500);
  if (passage) {
    const isSwedishSource = !!(doc.titel && !doc.title);
    parts.push(isSwedishSource
      ? svSpan(escapeHtml(passage), lang)
      : escapeHtml(passage));
  } else {
    parts.push(escapeHtml(generateEnhancedSummary(doc, normalizedType, lang)));
  }
 
  // ── PRIMARY: policy domain significance derived from document content ────
  const significance = generatePolicySignificance(doc, lang, docType);
  parts.push(`<strong>${escapeHtml(String(L(lang, 'whatThisMeans')))}:</strong> ${significance}`);
 
  // ── PRIMARY: related speeches (direct evidence from the chamber) ─────────
  const speeches = doc.speeches || [];
  Iif (speeches.length > 0) {
    const speakerLines = speeches.slice(0, 2).map(s => {
      const who = [s.talare, s.parti ? `(${s.parti})` : ''].filter(Boolean).join(' ');
      return who ? escapeHtml(who) : 'Unknown speaker';
    }).join(', ');
    parts.push(`<em>Debate contributions from: ${speakerLines}.</em>`);
  }
 
  // ── SECONDARY: CIA historical context — only where it adds real perspective
  // For motions: historical passage rate is highly relevant context since
  // almost all opposition motions are denied (~99%). Only show when we have
  // an actual party-specific rate, so the note is concrete, not generic.
  if (docType === 'mot' && cia) {
    // Try to get party from doc fields, else parse from raw text
    let party = doc.parti;
    Iif (!party) {
      const rawText2 = doc.summary || doc.notis || doc.fullText || '';
      const parsed2 = parseMotionAuthorParty(rawText2);
      if (parsed2) party = parsed2.party;
    }
    const rate = partyMotionSuccessRate(party, cia);
    Eif (rate !== null && party) {
      parts.push(
        `<small class="cia-context">Historical context: ${escapeHtml(party)} motions have a ${escapeHtml(rate.toFixed(1))}% passage rate ` +
        `(${escapeHtml(String(cia.overallMotionDenialRate))}% of all opposition motions are rejected). ` +
        `This motion signals a policy position rather than an imminent legislative change.</small>`
      );
    }
  }
 
  // ── EARLY WARNING: high-influence documents with stability risk indicators ─
  if (cia) {
    const influenceScore = calculateInfluenceScore(doc);
    const stabilityScore = cia.coalitionStability.stabilityScore;
    const majorityMargin = cia.coalitionStability.majorityMargin;
 
    // Flag high-influence documents during coalition instability
    Iif (influenceScore >= 60 && stabilityScore < 50) {
      parts.push(
        `<small class="early-warning">⚠ Early warning: This high-influence document (score: ${escapeHtml(String(influenceScore))}) ` +
        `arrives during a period of coalition instability (stability: ${escapeHtml(String(stabilityScore))}). ` +
        `Monitor closely for defections or procedural delays.</small>`
      );
    } else if (majorityMargin <= 2 && (docType === 'prop' || docType === 'bet')) {
      parts.push(
        `<small class="early-warning">⚠ Thin majority alert: With only ${escapeHtml(String(majorityMargin))} seat majority, ` +
        `this ${docType === 'prop' ? 'government bill' : 'committee report'} faces elevated defeat risk.</small>`
      );
    }
  }
 
  // For propositions: coalition note is already in the article-level summary; skip per-document repetition.
  // (Moved to generateGenericContent key-takeaways section.)
 
  return parts.join(' ');
}