All files / scripts fix-hreflang.ts

0% Statements 0/78
0% Branches 0/32
0% Functions 0/8
0% Lines 0/71

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161                                                                                                                                                                                                                                                                                                                                 
#!/usr/bin/env npx tsx
/**
 * Fix hreflang links in ALL HTML files (news, dashboards, static pages)
 *
 * Repairs issues found in HTML files generated by older agentic workflows
 * or build scripts before the `renderHreflangBlock` helper was standardised:
 *
 * 1. **Relative hreflang URLs** → converts to absolute URLs with
 *    `https://riksdagsmonitor.com/…` prefix (auto-detects directory)
 * 2. **Missing `x-default`** → adds `x-default` pointing at the English
 *    version (or the file itself if no English sibling exists)
 * 3. **Non-standard hreflang codes** → normalises `zh-CN` → `zh` and
 *    `no` → `nb` (BCP-47 Bokmål)
 *
 * Scans: root `*.html`, `news/`, `dashboard/`, `dashboards/` and any
 * subdirectories. Skips: `node_modules`, `dist`, `docs`, `builds`,
 * `coverage`, `.git`, `scripts/build-dashboard-pages.snapshot`.
 *
 * Safe to run repeatedly — already-correct files are left untouched.
 *
 * Usage:
 *   npx tsx scripts/fix-hreflang.ts          # dry-run (default)
 *   npx tsx scripts/fix-hreflang.ts --write   # apply fixes
 *
 * @author Hack23 AB (Quality Engineering)
 * @license Apache-2.0
 */
 
import * as fs from 'node:fs';
import * as path from 'node:path';
 
const ROOT = path.resolve(import.meta.dirname, '..');
const BASE_URL = 'https://riksdagsmonitor.com';
const WRITE = process.argv.includes('--write');
 
/** Directories to skip when scanning for HTML files. */
const SKIP_DIRS = new Set([
  'node_modules', 'dist', 'docs', 'builds', 'coverage', '.git',
  'build-dashboard-pages.snapshot',
]);
 
/** Recursively collect all `.html` files under `dir`, skipping excluded dirs. */
function collectHtmlFiles(dir: string): string[] {
  const results: string[] = [];
  for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
    if (SKIP_DIRS.has(entry.name)) continue;
    const full = path.join(dir, entry.name);
    if (entry.isDirectory()) {
      results.push(...collectHtmlFiles(full));
    } else if (entry.name.endsWith('.html')) {
      results.push(full);
    }
  }
  return results;
}
 
interface FixResult {
  file: string;
  fixedRelative: number;
  addedXDefault: boolean;
}
 
function fixHreflangInFile(filePath: string): FixResult | null {
  const relPath = path.relative(ROOT, filePath);
  let html = fs.readFileSync(filePath, 'utf8');
  let changed = false;
  let fixedRelative = 0;
  let addedXDefault = false;
 
  // Determine the directory path for this file (e.g., "news/")
  const dirInSite = path.dirname(relPath).replace(/\\/g, '/');
  const prefix = `${BASE_URL}/${dirInSite}/`;
 
  // 1. Fix relative hreflang URLs in <link> tags
  // Match: <link rel="alternate" hreflang="XX" href="RELATIVE_FILE.html">
  // where href does NOT start with http
  const relativeHreflangRe = /(<link\s+rel="alternate"\s+hreflang="[^"]+"\s+href=")(?!https?:\/\/)([^"]+)(")/g;
  let newHtml = html.replace(relativeHreflangRe, (_match, before, href, after) => {
    fixedRelative++;
    return `${before}${prefix}${href}${after}`;
  });
 
  if (newHtml !== html) {
    html = newHtml;
    changed = true;
  }
 
  // 2. Normalise non-standard hreflang codes (zh-CN → zh, no → nb)
  const zhFixRe = /(<link\s+rel="alternate"\s+hreflang=")zh-CN(")/g;
  newHtml = html.replace(zhFixRe, '$1zh$2');
  if (newHtml !== html) {
    html = newHtml;
    changed = true;
  }
 
  // Norwegian: hreflang="no" → hreflang="nb" (BCP-47 Bokmål)
  const noFixRe = /(<link\s+rel="alternate"\s+hreflang=")no(")/g;
  newHtml = html.replace(noFixRe, '$1nb$2');
  if (newHtml !== html) {
    html = newHtml;
    changed = true;
  }
 
  // 3. Add x-default if missing (only for files that have ≥2 hreflang entries)
  const hreflangMatches = [...html.matchAll(/<link\s+rel="alternate"\s+hreflang="([^"]+)"\s+href="([^"]+)"/g)];
  if (hreflangMatches.length >= 2) {
    const hasXDefault = hreflangMatches.some((m) => m[1] === 'x-default');
    if (!hasXDefault) {
      // Find the English alternate or use the first one
      const enMatch = hreflangMatches.find((m) => m[1] === 'en');
      const defaultHref = enMatch ? enMatch[2] : hreflangMatches[0][2];
      // Insert x-default after the last hreflang link
      const lastHreflang = hreflangMatches[hreflangMatches.length - 1];
      const lastTag = lastHreflang[0] + '>';
      const xDefaultTag = `\n  <link rel="alternate" hreflang="x-default" href="${defaultHref}">`;
      html = html.replace(lastTag, lastTag + xDefaultTag);
      addedXDefault = true;
      changed = true;
    }
  }
 
  if (!changed) return null;
 
  if (WRITE) {
    fs.writeFileSync(filePath, html, 'utf8');
  }
 
  return { file: relPath, fixedRelative, addedXDefault };
}
 
function main(): void {
  const htmlFiles = collectHtmlFiles(ROOT);
 
  console.log(`Scanning ${htmlFiles.length} HTML files across all directories…`);
  console.log(`Mode: ${WRITE ? 'WRITE (applying fixes)' : 'DRY-RUN (use --write to apply)'}`);
 
  const results: FixResult[] = [];
  for (const file of htmlFiles) {
    const result = fixHreflangInFile(file);
    if (result) results.push(result);
  }
 
  if (results.length === 0) {
    console.log('\n✅ All HTML files have correct hreflang links.');
    return;
  }
 
  const totalRelative = results.reduce((sum, r) => sum + r.fixedRelative, 0);
  const totalXDefault = results.filter((r) => r.addedXDefault).length;
 
  console.log(`\n${WRITE ? '✅ Fixed' : '⚠️  Would fix'} ${results.length} file(s):`);
  console.log(`  • ${totalRelative} relative hreflang URLs → absolute`);
  console.log(`  • ${totalXDefault} missing x-default entries added`);
 
  if (!WRITE) {
    console.log('\nRun with --write to apply fixes.');
  }
}
 
main();