Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 | 8x 8x 8x 8x 37x 25x 25x 25x 100x 100x 100x 100x 100x 100x 25x 25x 25x 25x 300x 300x 25x 25x 25x 1000x 1000x 200x 1000x 1000x 1000x 1000x 1000x 25x 25x 25x 200x 100x 25x 25x 25x 25x 25x 25x 900x 900x 75x 75x 25x 25x 25x 25x 125x 25x 25x 25x 25x 12500x 25x 25x 25x 25x 25x 25x 36x 36x 82x 82x 82x 82x 82x 82x 187x 187x 82x 82x 82x 82x 82x 82x 82x 36x 11x 36x 25x 1x 1x 2x 2x 1x 3x 3x 3x 23x 23x 23x 5x 18x 6x 12x 23x 17x | /**
* @module news-types/weekly-review/data-loader
* @description Data loading and processing utilities for weekly-review articles.
* Handles CIA context loading, CSV parsing, document full-text enrichment,
* and speech attachment.
*
* @author Hack23 AB
* @license Apache-2.0
*/
import { readFileSync, existsSync } from 'fs';
import { join, dirname } from 'path';
import { fileURLToPath } from 'url';
import Papa from 'papaparse';
import { MCPClient } from '../../mcp-client.js';
import {
isPersonProfileText,
type RawDocument,
type CIAContext,
} from '../../data-transformers.js';
import type { MCPCallRecord } from '../../types/article.js';
/** Current Riksdag parties (2022 election onwards). */
const RIKSDAG_PARTIES = new Set(['M', 'SD', 'KD', 'L', 'C', 'S', 'V', 'MP']);
export function formatDateForSlug(date: Date = new Date()): string {
return date.toISOString().split('T')[0] ?? '';
}
/**
* Resolve the repo data directory path.
* Works both in Node.js ESM and from compiled paths.
*/
export function repoDataDir(): string {
try {
const __dirname = dirname(fileURLToPath(import.meta.url));
// From scripts/news-types/weekly-review/ → up three levels to repo root → data/
return join(__dirname, '..', '..', '..', 'data');
} catch {
return join(process.cwd(), 'data');
}
}
/**
* Resolve the cia-data directory path (repo root/cia-data).
*/
function resolveCIADataDir(): string {
try {
const __dirname = dirname(fileURLToPath(import.meta.url));
// From scripts/news-types/weekly-review/ → up three levels to repo root → cia-data/
return join(__dirname, '..', '..', '..', 'cia-data');
} catch {
return join(process.cwd(), 'cia-data');
}
}
/**
* Parse a CSV file into an array of row objects keyed by header names.
* Uses PapaParse for correct RFC 4180 handling (escaped quotes, embedded
* commas, multi-line fields).
* Returns an empty array if the file does not exist or cannot be parsed.
*/
function parseCsvFile(filePath: string): Array<Record<string, string>> {
Iif (!existsSync(filePath)) {
console.warn(`CIA data file not found: ${filePath}`);
return [];
}
try {
const text = readFileSync(filePath, 'utf-8');
const result = Papa.parse<Record<string, string>>(text, {
header: true,
skipEmptyLines: true,
});
Iif (result.errors.length > 0) {
console.warn(`CSV parsing warnings for ${filePath}:`, result.errors);
}
return result.data;
} catch (err) {
console.error(`Failed to parse CSV ${filePath}:`, err);
return [];
}
}
// RIKSDAG_PARTIES is defined at the top of this file
/**
* Load CIA intelligence context from real CSV files in cia-data/.
* Sources:
* • cia-data/party/view_party_performance_metrics_sample.csv – win rates, documents, rebel rate
* • cia-data/view_riksdagen_party_summary_sample.csv – current seat counts
* • cia-data/party/distribution_coalition_alignment.csv – inter-party alignment
* • cia-data/view_riksdagen_committee_decisions.csv – committee decision outcomes
*
* Returns a populated CIAContext or a minimal fallback when files are missing.
*/
export function loadCIAContext(): CIAContext {
const ciaDir = resolveCIADataDir();
// ── 1. Seat counts from view_riksdagen_party_summary_sample.csv ──────────
const seatMap = new Map<string, number>();
const partySummaryRows = parseCsvFile(join(ciaDir, 'view_riksdagen_party_summary_sample.csv'));
for (const row of partySummaryRows) {
const party = row['party']?.trim();
if (party) seatMap.set(party, parseInt(row['total_active_parliament'] ?? '0', 10) || 0);
}
// ── 2. Party performance from view_party_performance_metrics_sample.csv ──
const partyPerformance: CIAContext['partyPerformance'] = [];
const partyMetricsRows = parseCsvFile(join(ciaDir, 'party', 'view_party_performance_metrics_sample.csv'));
for (const row of partyMetricsRows) {
const id = row['party']?.trim() ?? '';
if (!RIKSDAG_PARTIES.has(id)) continue;
const avgWinRate = parseFloat(row['avg_win_rate'] ?? '0') || 0;
const avgRebelRate = parseFloat(row['avg_rebel_rate'] ?? '0') || 0;
const docsLastYear = parseInt(row['documents_last_year'] ?? '0', 10) || 0;
const ministers = parseInt(row['current_ministers'] ?? '0', 10) || 0;
const perfLevel = row['performance_level']?.trim() ?? '';
partyPerformance.push({
id,
partyName: row['party_name']?.trim() ?? id,
metrics: {
seats: seatMap.get(id) ?? 0,
// avg_win_rate is 0-100 percentage (e.g. M=86.49, S=43.40)
successRate: avgWinRate,
motionsSubmitted: docsLastYear,
motionsPassed: Math.round(avgWinRate * docsLastYear / 100),
// avg_rebel_rate is a 0-1 decimal ratio (e.g. S=0.06 → 6% rebel rate)
cohesionScore: Math.round((1 - avgRebelRate) * 100),
},
trends: {
supportTrend: ministers > 0 ? 'stable' : (avgWinRate < 50 ? 'declining' : 'stable'),
activityTrend: perfLevel === 'EXCELLENT' ? 'increasing' : perfLevel === 'BELOW_AVERAGE' ? 'declining' : 'stable',
},
});
}
// ── 3. Coalition stability from distribution_coalition_alignment.csv ─────
const coalignRows = parseCsvFile(join(ciaDir, 'party', 'distribution_coalition_alignment.csv'));
// Government bloc: M + KD + L + SD (SD provides confidence-and-supply support)
const GOV_PARTIES = new Set(['M', 'KD', 'L', 'SD']);
const govSeats = partyPerformance
.filter(p => GOV_PARTIES.has(p.id))
.reduce((s, p) => s + p.metrics.seats, 0);
const totalSeats = 349;
const majorityNeeded = Math.floor(totalSeats / 2) + 1; // 175
const majorityMargin = govSeats - majorityNeeded;
// Average alignment among the three formal government parties (M, KD, L)
const coreGovPairs = new Set(['M-KD', 'M-L', 'KD-L', 'KD-M', 'L-M', 'L-KD']);
let alignmentSum = 0; let alignmentCount = 0;
for (const row of coalignRows) {
const pair = `${row['party1']?.trim() ?? ''}-${row['party2']?.trim() ?? ''}`;
if (coreGovPairs.has(pair)) {
alignmentSum += parseFloat(row['alignment_rate'] ?? '0') || 0;
alignmentCount++;
}
}
const stabilityScore = alignmentCount > 0
? Math.round((alignmentSum / alignmentCount) * 100)
: 75;
const riskLevel = majorityMargin <= 0 ? 'high' : majorityMargin <= 2 ? 'moderate' : 'low';
const coalitionStability: CIAContext['coalitionStability'] = {
stabilityScore,
riskLevel,
// Base 20% defection probability, reduced 3% per seat of margin, minimum 5%
defectionProbability: Math.max(5, Math.round(20 - majorityMargin * 3)),
majorityMargin: Math.max(0, majorityMargin),
};
// ── 4. Voting patterns from coalition alignment (top 5 party pairs) ───────
const votingPatterns: CIAContext['votingPatterns'] = {
keyIssues: coalignRows.slice(0, 5).map(row => ({
topic: `${row['party1']?.trim() ?? ''}-${row['party2']?.trim() ?? ''} alignment`,
coalitionAlignment: Math.round((parseFloat(row['alignment_rate'] ?? '0') || 0) * 100),
oppositionAlignment: Math.round((1 - (parseFloat(row['alignment_rate'] ?? '0') || 0)) * 100),
crossPartyVotes: parseInt(row['aligned_votes'] ?? '0', 10) || 0,
})),
};
// ── 5. Motion denial rate from committee decision outcomes ────────────────
let overallMotionDenialRate = 96; // historical baseline from CIA data
const decisionsRows = parseCsvFile(join(ciaDir, 'view_riksdagen_committee_decisions.csv'));
Eif (decisionsRows.length > 0) {
const committeeWins = decisionsRows.filter(r =>
r['winner']?.trim().toLowerCase() === 'utskottet'
).length;
overallMotionDenialRate = Math.round((committeeWins / decisionsRows.length) * 100);
}
console.log(
` 📊 CIA CSV context: ${partyPerformance.length} parties, ` +
`gov seats ${govSeats}/${totalSeats} (margin ${majorityMargin}), ` +
`stability ${stabilityScore}/100, denial rate ${overallMotionDenialRate}%`
);
return { partyPerformance, coalitionStability, votingPatterns, overallMotionDenialRate };
}
/**
* Enrich a flat list of documents with full text via get_dokument_innehall.
* Mutates each document in place; never throws — failures are logged and skipped.
*/
export async function enrichWithFullText(
client: MCPClient,
documents: RawDocument[],
mcpCalls: MCPCallRecord[],
concurrency = 3,
): Promise<void> {
console.log(` 📖 Enriching ${documents.length} documents with full text (concurrency ${concurrency})...`);
let enriched = 0;
for (let i = 0; i < documents.length; i += concurrency) {
const batch = documents.slice(i, i + concurrency);
await Promise.allSettled(batch.map(async (doc) => {
const dokId = (doc as Record<string, string>).dok_id
?? (doc as Record<string, string>).dokumentnamn
?? (doc as Record<string, string>).id;
Iif (!dokId) return;
try {
const details = await client.fetchDocumentDetails(dokId, true);
mcpCalls.push({ tool: 'get_dokument_innehall', result: details });
// Merge full text fields into document.
// NOTE: details['text'] from get_dokument_innehall is a raw database metadata
// dump (IDs, dates, URLs), NOT human-readable prose — do not use as fullText.
// Also: some documents return politician profile text (MP status like
// "Tjänstgörande riksdagsledamot..." or "Avliden YYYY-MM-DD...") in their
// notis/summary/fullText fields — discard these to prevent them from
// appearing as article content.
const sanitize = (s: unknown): string => {
const str = (s as string) ?? '';
return isPersonProfileText(str) ? '' : str;
};
const d = doc as Record<string, unknown>;
d['fullText'] = sanitize(details['fullText'])
|| sanitize(details['summary'])
|| sanitize(details['notis'])
|| '';
d['fullContent'] = (details['html'] as string) ?? '';
if (!d['summary'] && details['summary']) d['summary'] = sanitize(details['summary']);
Iif (!d['notis'] && details['notis']) d['notis'] = sanitize(details['notis']);
d['contentFetched'] = true;
enriched++;
} catch (err: unknown) {
console.error(` ⚠ Failed to fetch full text for ${dokId}:`, (err as Error).message);
}
}));
// Small delay between batches to avoid rate limiting
if (i + concurrency < documents.length) {
await new Promise<void>(r => setTimeout(r, 300));
}
}
console.log(` ✅ Enriched ${enriched}/${documents.length} documents with full text`);
}
/**
* Attach related speeches to documents that share the same dokId.
*/
export function attachSpeechesToDocuments(
documents: RawDocument[],
speeches: Array<Record<string, unknown>>,
): void {
if (speeches.length === 0) return;
// Build a loose index: dok_id → speeches
const speechIndex = new Map<string, Array<{ talare?: string; parti?: string; text?: string; anforande_nummer?: string }>>();
for (const s of speeches) {
const ref = String(s['intressent_id'] ?? s['dok_id'] ?? s['rel_dok_id'] ?? '');
Eif (!ref) continue;
if (!speechIndex.has(ref)) speechIndex.set(ref, []);
speechIndex.get(ref)!.push({
talare: s['talare'] as string | undefined,
parti: s['parti'] as string | undefined,
text: (s['anforande_text'] as string | undefined)?.slice(0, 300),
anforande_nummer: s['anforande_nummer'] as string | undefined,
});
}
for (const doc of documents) {
const dokId = (doc as Record<string, string>).dok_id ?? '';
const related = speechIndex.get(dokId);
Iif (related && related.length > 0) {
(doc as Record<string, unknown>).speeches = related;
}
}
}
/**
* Normalize CIAContext so defectionProbability is in [0, 1].
*
* risk-analysis.ts multiplies it by 100, so out-of-range values can
* explode scores. Expected input formats:
* - (0, 1] — already a proper probability fraction; kept as-is.
* Note: exactly 1.0 is treated as 100% (not as 1% whole-percent).
* - (1, ∞) — treated as a whole-percent (loadCIAContext returns min 5,
* e.g. 50 means 50% → normalized to 0.5); clamped to 1.
* - Non-finite or ≤ 0 — coerced to 0 (no defection risk).
*/
export function normalizedCIAContext(ctx: CIAContext): CIAContext {
const defProb = ctx.coalitionStability?.defectionProbability;
Iif (typeof defProb !== 'number') return ctx;
let normalized: number;
if (!Number.isFinite(defProb) || defProb <= 0) {
// Non-finite or non-positive: no defection risk.
normalized = 0;
} else if (defProb <= 1) {
// Already a fraction in (0, 1]: keep as-is (1.0 = 100% probability).
normalized = defProb;
} else {
// Whole-percent value (e.g. loadCIAContext min 5): convert to fraction and clamp.
normalized = Math.min(1, defProb / 100);
}
if (normalized === defProb) return ctx;
return {
...ctx,
coalitionStability: {
...ctx.coalitionStability!,
defectionProbability: normalized,
},
};
}
|