All files / scripts bake-stats-html.ts

52.43% Statements 43/82
32.5% Branches 13/40
71.42% Functions 5/7
52.77% Lines 38/72

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229                                                                          1x             1x                                                                               1x 1x 6x 1x 1x 7x 7x 7x 7x 7x 42x   7x   1x                       1x 1x 1x 7x 6x 6x 6x   1x 1x 21x 21x 6x     1x                                     3x 3x 3x 5x 5x 3x 3x   3x                                                                                                         1x 1x                                            
/**
 * @module Build/BakeStatsHtml
 * @category Build / Performance Optimization
 *
 * @description
 * Replaces every `<span ... data-stat-id="stat-…">PLACEHOLDER</span>` in
 * the targeted HTML files (default: `dist/index*.html`) with the real
 * value parsed from `cia-data/extraction_summary_report.csv`.
 *
 * Eliminates the runtime CSV fetch + DOM-rewrite that the browser
 * stats-loader (`src/browser/dashboards/stats-loader.ts`) otherwise
 * performs on every navigation to a start page. The stat values shift
 * on a multi-day cadence (the CSV is re-pushed by an upstream CIA
 * pipeline), so baking at deploy time is correct: every deploy carries
 * the same values the runtime fetch would have produced, but without
 * the network round-trip, PapaParse path, or DOM mutation cost.
 *
 * Rationale (perf):
 *   - The start pages otherwise pay ≈ 15 KiB CSV + parse + N DOM writes
 *     just to render numbers that are already known at build time.
 *   - Removing this eager work shaves TBT and lets the browser focus
 *     on FCP/LCP rendering of the hero banner.
 *
 * The mapping (`STAT_MAPPINGS`) MUST stay in sync with the browser
 * stats-loader so that pages that are NOT baked (e.g. `dashboards/*`)
 * still display identical values through the runtime path. The test
 * `tests/bake-stats-html.test.ts` enforces parity by importing both
 * symbol tables and asserting equality.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
 
const __filename = fileURLToPath(import.meta.url);
 
/**
 * Keep this identical to `STAT_MAPPINGS` in
 * `src/browser/dashboards/stats-loader.ts`. The test
 * `tests/bake-stats-html.test.ts` enforces parity.
 */
export const STAT_MAPPINGS: Readonly<Record<string, string>> = {
  // Hero stats
  'stat-historical-persons': 'person_data',
  'stat-total-votes': 'view_riksdagen_vote_data_ballot_politician_summary',
  'stat-total-documents': 'document_data',
  'stat-rule-violations': 'rule_violation',
  'stat-government-proposals': 'view_riksdagen_goverment_proposals',
  'stat-committee-decisions': 'view_riksdagen_committee_decisions',
 
  // Intelligence section stats
  'stat-committee-documents': 'view_riksdagen_committee_decision_type_summary',
  'stat-document-activities': 'view_riksdagen_document_type_daily_summary',
  'stat-riksdag-parties': 'view_riksdagen_party',
  'stat-against-proposals': 'view_riksdagen_vote_data_ballot_summary',
  'stat-committee-proposals':
    'view_riksdagen_committee_decision_type_org_summary',
  'stat-government-roles': 'view_riksdagen_goverment_roles',
  'stat-government-role-members': 'view_riksdagen_goverment_role_member',
  'stat-member-proposals': 'view_riksdagen_person_signed_document_summary',
  'stat-committee-role-members': 'view_riksdagen_committee_role_member',
  'stat-party-members': 'view_riksdagen_party_member',
  'stat-party-summary': 'view_riksdagen_party_summary',
  'stat-ballot-summaries': 'view_riksdagen_vote_data_ballot_party_summary',
  'stat-political-parties': 'sweden_political_party',
  'stat-assignments': 'assignment_data',
  'stat-document-attachments': 'document_attachment',
};
 
interface ExtractionRow {
  object_name: string;
  status: string;
  row_count: string;
}
 
/**
 * Tiny CSV parser scoped to the extraction-summary file. We do not
 * import PapaParse here because the file has no quoted fields, and
 * this keeps the script with zero runtime dependencies (besides Node).
 */
function parseCsv(text: string): ExtractionRow[] {
  const lines = text.split(/\r?\n/);
  Iif (lines.length === 0) return [];
  const headers = lines[0].split(',').map((h) => h.trim());
  const out: ExtractionRow[] = [];
  for (let i = 1; i < lines.length; i++) {
    const raw = lines[i];
    Iif (!raw) continue;
    const cells = raw.split(',');
    const row: Record<string, string> = {};
    for (let j = 0; j < headers.length; j++) {
      row[headers[j]] = (cells[j] ?? '').trim();
    }
    out.push(row as unknown as ExtractionRow);
  }
  return out;
}
 
/**
 * Build the `stat-id → display-string` lookup from a CSV file. Numbers
 * are formatted with `en-US` thousands separators so the rendered
 * output matches what the browser stats-loader produces via
 * `Number.toLocaleString()` on the index pages (which use `lang="en"`
 * and language-neutral hero markup). Localised index files (`_sv`,
 * `_de`, …) share the same digits — only surrounding labels differ.
 */
export function buildStatLookup(csvText: string): Record<string, string> {
  const rows = parseCsv(csvText);
  const byObject: Record<string, number> = {};
  for (const row of rows) {
    if (row.status !== 'success') continue;
    const n = Number(row.row_count);
    Iif (!Number.isFinite(n)) continue;
    byObject[row.object_name] = n;
  }
  const out: Record<string, string> = {};
  for (const [statId, objectName] of Object.entries(STAT_MAPPINGS)) {
    const value = byObject[objectName];
    if (typeof value === 'number') {
      out[statId] = value.toLocaleString('en-US');
    }
  }
  return out;
}
 
/**
 * Rewrite every `<span … data-stat-id="X">…</span>` whose `X` has a
 * baked value. Untouched ids (no CSV value, or no mapping) are left as
 * placeholders so the runtime path on non-baked pages still has work
 * to do — i.e. this script is purely subtractive on baked pages.
 *
 * @returns the rewritten HTML and the number of substitutions made.
 */
export function bakeHtml(
  html: string,
  lookup: Record<string, string>,
): { html: string; replaced: number } {
  // Match a span with `data-stat-id="<id>"` (attribute order tolerant)
  // and a single text-node child. The trailing `</span>` is captured
  // separately so we do not accidentally span across nested markup.
  const spanRe =
    /(<span\b[^>]*\bdata-stat-id="(stat-[a-z][a-z0-9-]*)"[^>]*>)([^<]*)(<\/span>)/gi;
  let replaced = 0;
  const rewritten = html.replace(spanRe, (full, open, statId, _inner, close) => {
    const value = lookup[statId];
    if (value === undefined) return full;
    replaced++;
    return `${open}${value}${close}`;
  });
  return { html: rewritten, replaced };
}
 
interface BakeOptions {
  /** Directory containing the HTML files to bake (default: `dist`). */
  distDir: string;
  /** Path to the extraction-summary CSV (default: `cia-data/extraction_summary_report.csv`). */
  csvPath: string;
  /** Glob-equivalent filter for files inside `distDir`. */
  filter?: (relPath: string) => boolean;
}
 
/** Default scope: every hand-authored `index*.html` at the dist root. */
function defaultFilter(rel: string): boolean {
  return /^index(_[a-z]{2,3})?\.html$/i.test(rel);
}
 
export function bakeStatsHtml(opts: BakeOptions): {
  files: { path: string; replaced: number }[];
  totalReplaced: number;
} {
  if (!fs.existsSync(opts.csvPath)) {
    throw new Error(`bake-stats-html: CSV not found at ${opts.csvPath}`);
  }
  const csvText = fs.readFileSync(opts.csvPath, 'utf8');
  const lookup = buildStatLookup(csvText);
  if (Object.keys(lookup).length === 0) {
    throw new Error(
      `bake-stats-html: 0 stat values built from ${opts.csvPath} — refusing to deploy with empty placeholders`,
    );
  }
 
  const filter = opts.filter ?? defaultFilter;
  const entries = fs.readdirSync(opts.distDir);
  const results: { path: string; replaced: number }[] = [];
  let totalReplaced = 0;
  for (const name of entries) {
    if (!filter(name)) continue;
    const full = path.join(opts.distDir, name);
    if (!fs.statSync(full).isFile()) continue;
    const before = fs.readFileSync(full, 'utf8');
    const { html: after, replaced } = bakeHtml(before, lookup);
    if (replaced > 0 && after !== before) {
      fs.writeFileSync(full, after);
    }
    results.push({ path: name, replaced });
    totalReplaced += replaced;
  }
  return { files: results, totalReplaced };
}
 
// ─── CLI ─────────────────────────────────────────────────────────────────────
const isMain =
  process.argv[1] && path.resolve(process.argv[1]) === path.resolve(__filename);
Iif (isMain) {
  const distDir = process.argv[2] || 'dist';
  const csvPath =
    process.argv[3] || path.join('cia-data', 'extraction_summary_report.csv');
  if (!fs.existsSync(distDir)) {
    console.error(`bake-stats-html: dist directory not found at ${distDir}`);
    process.exit(1);
  }
  const { files, totalReplaced } = bakeStatsHtml({ distDir, csvPath });
  for (const f of files) {
    console.log(`  ${f.path}: replaced ${f.replaced} stat span(s)`);
  }
  console.log(
    `✓ bake-stats-html: ${totalReplaced} substitution(s) across ${files.length} file(s) (${distDir})`,
  );
  if (totalReplaced === 0) {
    console.error(
      `bake-stats-html: no substitutions made — placeholders still rely on runtime fetch. Failing build.`,
    );
    process.exit(1);
  }
}