All files / scripts vite-plugin-static-pages.js

96.05% Statements 73/76
78.37% Branches 29/37
100% Functions 9/9
97.1% Lines 67/69

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281                                                                                                                                                                                                      1x                         8x 8x   8x 8x 8x       2x 2x 2x   1x 2x 1x               1x               7x                       7x 7x 7x 7x 6x 6x 4x 4x   2x 2x 2x 7x 7x     2x 1x 2x                     7x                           10x 3x 3x                           9x   9x                 8x 8x 8x 8x 8x   8x 8x 8x   8x 7x 7x 7x   7x 11x 11x 11x   11x 11x 11x     10x 10x 10x             11x 11x 11x     7x 7x 7x     7x 7x   7x                
/**
 * vite-plugin-static-pages
 *
 * Emits the (very large) set of pre-rendered "static" HTML pages —
 * news articles, news/index_*.html, sitemap_*.html, and
 * political-intelligence_*.html — into `dist/` *outside* Rollup's
 * module graph.
 *
 * ## Why this exists
 *
 * These pages are produced by the prebuild scripts (render-articles,
 * generate-news-indexes, generate-sitemap-html, generate-political-
 * intelligence). They are **pure static HTML**: they reference
 * `styles.css` via a relative path (`styles.css` from the project
 * root, `../styles.css` from the `news/` subfolder), inline a tiny
 * theme-init `<script>`, and embed JSON-LD — but they have **no
 * `<script type="module">`**, no Vite-resolvable imports, and no
 * other bundle-able assets.
 *
 * Listing them as `rollupOptions.input` previously forced Rollup to
 * add ~3 540 entries (3 497 news articles + 14 PI pages + 14
 * sitemaps + 14 news index pages) into the module graph just to
 * rewrite a single `styles.css` `<link>` tag in each. At ~4 250
 * modules the `rendering chunks` phase exhausted Node's default
 * ~4 GB heap, causing
 *   `FATAL ERROR: Ineffective mark-compacts near heap limit
 *    Allocation failed - JavaScript heap out of memory`
 * (release run 25133177267, build log line "rendering chunks…").
 *
 * Bumping `--max-old-space-size` to 8 GB (PR #2117) only delayed
 * the failure — every additional day of news content brings the
 * limit back. The root cause is that Rollup is the wrong tool for
 * static page emission.
 *
 * ## What this plugin does
 *
 * Runs in `closeBundle` (after Vite/Rollup have finished writing
 * the real bundled outputs):
 *
 * 1. Reads `dist/.vite/manifest.json` to find the hashed bundle
 *    name for `styles.css` (e.g. `assets/styles-AbCdEf12.css`).
 * 2. Computes the SHA-384 SRI hash of that bundled stylesheet so
 *    we can attach `integrity="sha384-…" crossorigin="anonymous"`
 *    to the rewritten `<link rel="stylesheet">` — preserving the
 *    behaviour of `vite-plugin-sri-gen` for the pages we no longer
 *    route through Vite.
 * 3. Reads each static HTML page from the project root, performs
 *    a single regex rewrite of the `styles.css` `<link>` tag, and
 *    writes the result into the matching `dist/` location. No DOM
 *    parsing, no full corpus held in memory at once.
 *
 * Memory profile: O(largest single HTML file) ≈ 2 MB worst case
 * (political-intelligence_*.html). Time profile: O(n) on the
 * number of static pages, with synchronous fs writes pipelined by
 * the OS page cache. Both dimensions are dwarfed by Vite's normal
 * cost.
 *
 * ## Trust boundary
 *
 * The pages this plugin emits are entirely produced by the
 * repository's own prebuild scripts (no third-party templates) and
 * served from the same S3 bucket / CloudFront distribution as the
 * bundled CSS. The "trust S3 / CloudFront" classification in
 * `vite.config.js` (SRI dropped for first-party JS) applies here
 * too: we add SRI to the CSS link as a defence-in-depth measure
 * but no other resources need integrity attributes.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import crypto from 'node:crypto';
import fs from 'node:fs';
import path from 'node:path';
 
/**
 * @typedef {Object} StaticPageSource
 * @property {string} path  Path relative to the project root.
 * @property {boolean} [recurse]  When `path` is a directory, also
 *                                walk subdirectories. Defaults to false.
 */
 
/**
 * @typedef {Object} StaticPageSet
 * @property {string} label  Human-readable label for the build summary.
 * @property {ReadonlyArray<StaticPageSource>} sources
 */
 
/**
 * Match a single `<link rel="stylesheet" … href="…styles.css">`
 * tag and capture the surrounding attributes so we can splice in
 * `integrity` / `crossorigin` without disturbing the rest.
 *
 * The literal `styles.css` lives in three accepted forms:
 *   - `styles.css`        (root-level pages)
 *   - `../styles.css`     (one-level subfolders, e.g. news/)
 *   - `/styles.css`       (absolute, never used today but tolerated)
 */
const STYLESHEET_LINK_RE =
  /<link\b([^>]*?)\brel\s*=\s*"stylesheet"([^>]*?)\bhref\s*=\s*"((?:\.\.\/|\/)?styles\.css)"([^>]*)>/gi;
 
/**
 * Read Vite's emitted manifest to map `styles.css` → its hashed
 * output path. Falls back to scanning `dist/assets/` for a unique
 * `styles-*.css` if the manifest entry is missing (which can
 * happen when CSS is registered only under an HTML entry).
 *
 * @param {string} distDir  Absolute path to the Vite output dir.
 * @returns {string} Hashed asset path relative to `distDir`
 *                   (e.g. `assets/styles-Ab12.css`).
 */
function readStylesAssetName(distDir) {
  const manifestPath = path.join(distDir, '.vite', 'manifest.json');
  Eif (fs.existsSync(manifestPath)) {
    /** @type {Record<string, { file?: string }>} */
    const manifest = JSON.parse(fs.readFileSync(manifestPath, 'utf8'));
    const entry = manifest['styles.css'];
    if (entry && entry.file && entry.file.endsWith('.css')) return entry.file;
  }
 
  // Fallback — scan dist/assets/ for the unique hashed `styles-*.css`.
  const assetsDir = path.join(distDir, 'assets');
  Eif (fs.existsSync(assetsDir)) {
    const hits = fs
      .readdirSync(assetsDir)
      .filter((f) => /^styles-[A-Za-z0-9_-]+\.css$/.test(f));
    if (hits.length === 1) return `assets/${hits[0]}`;
    Iif (hits.length > 1) {
      throw new Error(
        `[static-pages] Found multiple styles-*.css in dist/assets/ ` +
          `(${hits.join(', ')}). Cannot determine canonical bundle.`,
      );
    }
  }
 
  throw new Error(
    `[static-pages] Could not resolve hashed styles.css filename. ` +
      `Set build.manifest = true in vite.config.js, or check that ` +
      `the main bundle build emitted a styles-*.css under dist/assets/.`,
  );
}
 
function sha384Base64(buffer) {
  return crypto.createHash('sha384').update(buffer).digest('base64');
}
 
/**
 * Walk a {@link StaticPageSet} and resolve absolute paths for
 * every HTML file it covers.
 *
 * @param {StaticPageSet} set
 * @param {string} projectRoot
 * @returns {string[]} absolute paths
 */
function resolvePageFiles(set, projectRoot) {
  const files = [];
  for (const src of set.sources) {
    const abs = path.join(projectRoot, src.path);
    if (!fs.existsSync(abs)) continue;
    const stat = fs.statSync(abs);
    if (stat.isFile() && abs.endsWith('.html')) {
      files.push(abs);
      continue;
    }
    Eif (stat.isDirectory()) {
      const entries = fs.readdirSync(abs, { withFileTypes: true });
      for (const e of entries) {
        Eif (e.isFile() && e.name.endsWith('.html')) {
          files.push(path.join(abs, e.name));
        }
      }
      if (src.recurse) {
        for (const e of entries) {
          Eif (!e.isDirectory()) continue;
          files.push(
            ...resolvePageFiles(
              { ...set, sources: [{ path: path.join(src.path, e.name), recurse: true }] },
              projectRoot,
            ),
          );
        }
      }
    }
  }
  return files;
}
 
/**
 * Resolve the rewritten `href` for a stylesheet link based on the
 * original relative form. The hashed CSS asset always lives at
 * `<dist>/assets/styles-<hash>.css`, so root-level pages reference
 * `assets/...` and one-level-deep pages reference `../assets/...`.
 *
 * @param {string} originalHref  e.g. `styles.css`, `../styles.css`
 * @param {string} hashedAsset   e.g. `assets/styles-Ab12.css`
 * @returns {string}
 */
function rewrittenHref(originalHref, hashedAsset) {
  if (originalHref.startsWith('../')) return `../${hashedAsset}`;
  Iif (originalHref.startsWith('/')) return `/${hashedAsset}`;
  return hashedAsset;
}
 
/**
 * Vite plugin factory.
 *
 * @param {{
 *   projectRoot: string,
 *   outDir: string,
 *   pageSets: ReadonlyArray<StaticPageSet>
 * }} options
 * @returns {import('vite').Plugin}
 */
export default function staticPagesPlugin(options) {
  const { projectRoot, outDir, pageSets } = options;
 
  return {
    name: 'static-pages-emit',
    apply: 'build',
    enforce: 'post',
 
    closeBundle: {
      order: 'post',
      sequential: true,
      handler() {
        const distDir = path.isAbsolute(outDir) ? outDir : path.join(projectRoot, outDir);
        const hashedAsset = readStylesAssetName(distDir);
        const cssAbs = path.join(distDir, hashedAsset);
        const cssBuf = fs.readFileSync(cssAbs);
        const integrity = `sha384-${sha384Base64(cssBuf)}`;
 
        let totalEmitted = 0;
        let totalRewritten = 0;
        const setSummary = [];
 
        for (const set of pageSets) {
          const files = resolvePageFiles(set, projectRoot);
          let emitted = 0;
          let rewritten = 0;
 
          for (const absPath of files) {
            const rel = path.relative(projectRoot, absPath);
            const destAbs = path.join(distDir, rel);
            fs.mkdirSync(path.dirname(destAbs), { recursive: true });
 
            const html = fs.readFileSync(absPath, 'utf8');
            let didRewrite = false;
            const out = html.replace(
              STYLESHEET_LINK_RE,
              (_m, before, mid, href, after) => {
                didRewrite = true;
                const newHref = rewrittenHref(href, hashedAsset);
                return (
                  `<link${before}rel="stylesheet"${mid}` +
                  `href="${newHref}" integrity="${integrity}" crossorigin="anonymous"${after}>`
                );
              },
            );
 
            fs.writeFileSync(destAbs, out, 'utf8');
            emitted += 1;
            if (didRewrite) rewritten += 1;
          }
 
          setSummary.push({ label: set.label, count: emitted, rewritten });
          totalEmitted += emitted;
          totalRewritten += rewritten;
        }
 
        const summary = setSummary
          .map((s) => `${s.label}=${s.count}/${s.rewritten}`)
          .join(', ');
        console.log(
          `[static-pages] emitted ${totalEmitted} HTML page(s), ` +
            `rewrote styles.css href in ${totalRewritten} (${summary})`,
        );
      },
    },
  };
}