All files / scripts/statskontoret/internal url-guard.ts

100% Statements 10/10
100% Branches 5/5
100% Functions 2/2
100% Lines 10/10

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50                                    17x                           11x 11x   1x   10x 1x   9x 9x 2x         7x    
/**
 * @module scripts/statskontoret/internal/url-guard
 * @description Allowlist guard + URL resolver for Statskontoret outbound fetches.
 *
 * Shared by the HTTP client and the link extractor — both need to resolve
 * relative HREFs against the same base URL and reject hosts outside the
 * Statskontoret allowlist per the firewall contract documented in
 * `analysis/statskontoret/indicators-inventory.json`.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import { StatskontoretError } from '../errors.js';
import { STATSKONTORET_BASE_URL } from '../source-registry.js';
import { decodeHtml, trimTrailingSlash } from './text.js';
 
export function resolveStatskontoretUrl(url: string, baseURL: string): string {
  return new URL(decodeHtml(url), `${trimTrailingSlash(baseURL)}/`).toString();
}
 
/**
 * Validate that an outbound URL targets the Statskontoret allowlisted host
 * over HTTPS before issuing a fetch. Mirrors the firewall allowlist documented
 * in `analysis/statskontoret/indicators-inventory.json` so absolute URLs from
 * untrusted callers cannot redirect the client to arbitrary hosts.
 */
export function assertStatskontoretFetchTarget(
  url: string,
  baseURL: string = STATSKONTORET_BASE_URL,
): URL {
  let parsed: URL;
  try {
    parsed = new URL(url);
  } catch {
    throw new StatskontoretError(`Invalid Statskontoret URL: ${url}`, 'http');
  }
  if (parsed.protocol !== 'https:') {
    throw new StatskontoretError(`Statskontoret fetch must use https: ${url}`, 'http');
  }
  const allowedHost = new URL(baseURL).hostname;
  if (parsed.hostname !== allowedHost) {
    throw new StatskontoretError(
      `Statskontoret fetch host ${parsed.hostname} not in allowlist (${allowedHost})`,
      'http',
    );
  }
  return parsed;
}