All files / scripts fetch-statskontoret.ts

84.61% Statements 33/39
94.11% Branches 16/17
88.88% Functions 8/9
84.21% Lines 32/38

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247                                                                                                1x 1x     1x     1x                                                                                         6x       6x       6x 6x 6x   2x                     4x 4x                                                           3x   3x 3x     3x 3x 1x 1x                         2x       2x 2x             2x 1x 1x                     1x 2x                                                             3x 3x 3x 3x               4x    
/**
 * @module scripts/fetch-statskontoret
 * @description Cached fetch module for Statskontoret open data, providing a
 * 30-day TTL cache layer over {@link StatskontoretClient}.
 *
 * This module is intended for use by agentic workflows that need Statskontoret
 * context (authority register, budget outturn) without re-downloading large
 * Excel/ZIP files on every run. It follows the same no-MCP client pattern as
 * `imf-context.ts` and `scb-context.ts`.
 *
 * ### Cache behaviour
 * - Cache root: `analysis/data/statskontoret/<sourceKey>/cache/`
 * - TTL: 30 days (configurable via the `cacheTtlMs` option)
 * - On hit: returns the cached payload with provenance metadata
 * - On miss or stale: invokes `StatskontoretClient.discoverDownloads()` and
 *   persists the result before returning
 * - On fetch error: falls back to the most recent stale cache entry (resilience)
 *
 * ### Security
 * Fetch calls go only to `https://www.statskontoret.se` (enforced by
 * `assertStatskontoretFetchTarget` inside `StatskontoretClient`). No
 * credentials are required; all data is PUBLIC classification.
 *
 * @see analysis/statskontoret/indicators-inventory.json
 * @see scripts/statskontoret-client.ts  (low-level HTTP + parse)
 * @see scripts/statskontoret-fetch.ts   (CLI entry-point)
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import fs from 'node:fs';
import path from 'node:path';
import { fileURLToPath } from 'node:url';
 
import {
  getStatskontoretSource,
  STATSKONTORET_SOURCES,
  StatskontoretClient,
  StatskontoretError,
  type StatskontoretClientConfig,
  type StatskontoretDownloadLink,
  type StatskontoretSourceKey,
} from './statskontoret-client.js';
 
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
 
const __filename = fileURLToPath(import.meta.url);
const REPO_ROOT = path.resolve(path.dirname(__filename), '..');
 
/** Default 30-day cache TTL in milliseconds (30 days × 24 h × 60 min × 60 s × 1000 ms). */
export const CACHE_TTL_MS = 30 * 24 * 60 * 60 * 1000;
 
/** Root directory for cached Statskontoret payloads. */
export const STATSKONTORET_CACHE_ROOT = path.join(
  REPO_ROOT,
  'analysis',
  'data',
  'statskontoret',
);
 
// ---------------------------------------------------------------------------
// Types
// ---------------------------------------------------------------------------
 
/** A cached Statskontoret downloads payload with provenance metadata. */
export interface StatskontoretCachedPayload {
  readonly sourceKey: StatskontoretSourceKey;
  readonly sourceTitle: string;
  readonly sourceUrl: string;
  readonly links: readonly StatskontoretDownloadLink[];
  readonly cachedAt: string;
  readonly fetchedAt: string;
  readonly fromCache: boolean;
  readonly cacheAgeMs: number;
}
 
/** Options for {@link fetchStatskontoretCached}. */
export interface FetchStatskontoretCachedOptions {
  /** Override the 30-day TTL (milliseconds). Mainly for testing. */
  readonly cacheTtlMs?: number;
  /** Override the cache root directory. Mainly for testing. */
  readonly cacheRoot?: string;
  /** Override the `StatskontoretClient` configuration (e.g. inject a mock fetch). */
  readonly clientConfig?: StatskontoretClientConfig;
}
 
/** Internal cache file format. */
interface CacheEntry {
  readonly fetchedAt: string;
  readonly sourceKey: StatskontoretSourceKey;
  readonly links: StatskontoretDownloadLink[];
}
 
// ---------------------------------------------------------------------------
// Private helpers
// ---------------------------------------------------------------------------
 
function cacheDir(sourceKey: StatskontoretSourceKey, cacheRoot: string): string {
  return path.join(cacheRoot, sourceKey, 'cache');
}
 
function cacheFilePath(sourceKey: StatskontoretSourceKey, cacheRoot: string): string {
  return path.join(cacheDir(sourceKey, cacheRoot), 'downloads.json');
}
 
function readCacheEntry(filePath: string): CacheEntry | undefined {
  try {
    const raw = fs.readFileSync(filePath, 'utf-8');
    return JSON.parse(raw) as CacheEntry;
  } catch {
    return undefined;
  }
}
 
function writeCacheEntry(filePath: string, entry: CacheEntry): void {
  const dir = path.dirname(filePath);
  fs.mkdirSync(dir, { recursive: true });
  fs.writeFileSync(filePath, JSON.stringify(entry, null, 2), 'utf-8');
}
 
function isCacheFresh(fetchedAt: string, ttlMs: number): boolean {
  const age = Date.now() - new Date(fetchedAt).getTime();
  return age < ttlMs;
}
 
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
 
/**
 * Fetch Statskontoret download links for a given source key, using a 30-day
 * file-system cache.
 *
 * @param sourceKey - The Statskontoret source to fetch
 *   (`myndighetsforteckning`, `arsutfall`, `manadsutfall`, `budget-time-series`).
 * @param options   - Optional TTL, cache-root and client overrides.
 * @returns A {@link StatskontoretCachedPayload} with links and provenance info.
 *
 * @example
 * ```ts
 * const payload = await fetchStatskontoretCached('myndighetsforteckning');
 * console.log(`Found ${payload.links.length} download links (fromCache=${payload.fromCache})`);
 * ```
 */
export async function fetchStatskontoretCached(
  sourceKey: StatskontoretSourceKey,
  options: FetchStatskontoretCachedOptions = {},
): Promise<StatskontoretCachedPayload> {
  const {
    cacheTtlMs = CACHE_TTL_MS,
    cacheRoot = STATSKONTORET_CACHE_ROOT,
    clientConfig = {},
  } = options;
 
  const source = getStatskontoretSource(sourceKey);
  const filePath = cacheFilePath(sourceKey, cacheRoot);
 
  // --- Cache hit ---
  const cached = readCacheEntry(filePath);
  if (cached !== undefined && isCacheFresh(cached.fetchedAt, cacheTtlMs)) {
    const cacheAgeMs = Date.now() - new Date(cached.fetchedAt).getTime();
    return {
      sourceKey,
      sourceTitle: source.title,
      sourceUrl: source.url,
      links: cached.links,
      cachedAt: cached.fetchedAt,
      fetchedAt: cached.fetchedAt,
      fromCache: true,
      cacheAgeMs,
    };
  }
 
  // --- Cache miss or stale: fetch from origin ---
  const client = new StatskontoretClient(clientConfig);
  let links: StatskontoretDownloadLink[];
  let fetchedAt: string;
 
  try {
    links = await client.discoverDownloads(sourceKey);
    // Stamp provenance after discovery completes so `fetchedAt` reflects the
    // cache completion time, not when the request was issued.
    fetchedAt = new Date().toISOString();
    writeCacheEntry(filePath, { fetchedAt, sourceKey, links });
  } catch (error) {
    // --- Resilience: return stale cache on fetch failure ---
    if (cached !== undefined) {
      const cacheAgeMs = Date.now() - new Date(cached.fetchedAt).getTime();
      return {
        sourceKey,
        sourceTitle: source.title,
        sourceUrl: source.url,
        links: cached.links,
        cachedAt: cached.fetchedAt,
        fetchedAt: cached.fetchedAt,
        fromCache: true,
        cacheAgeMs,
      };
    }
    const detail = error instanceof Error ? error.message : String(error);
    throw new StatskontoretError(
      `fetch-statskontoret: failed to fetch ${sourceKey} and no cache available: ${detail}`,
      'http',
      { cause: error },
    );
  }
 
  return {
    sourceKey,
    sourceTitle: source.title,
    sourceUrl: source.url,
    links,
    cachedAt: fetchedAt,
    fetchedAt,
    fromCache: false,
    cacheAgeMs: 0,
  };
}
 
/**
 * Check whether a fresh cache entry exists for the given source key without
 * triggering a network fetch.
 *
 * @param sourceKey  - The Statskontoret source to check.
 * @param options    - Optional TTL and cache-root overrides.
 * @returns `true` if a fresh cache entry exists, `false` otherwise.
 */
export function isStatskontoretCacheFresh(
  sourceKey: StatskontoretSourceKey,
  options: Pick<FetchStatskontoretCachedOptions, 'cacheTtlMs' | 'cacheRoot'> = {},
): boolean {
  const { cacheTtlMs = CACHE_TTL_MS, cacheRoot = STATSKONTORET_CACHE_ROOT } = options;
  const filePath = cacheFilePath(sourceKey, cacheRoot);
  const cached = readCacheEntry(filePath);
  return cached !== undefined && isCacheFresh(cached.fetchedAt, cacheTtlMs);
}
 
/**
 * Return the list of all built-in Statskontoret source keys.
 * Useful for iterating over all sources in agentic workflows.
 */
export function statskontoretSourceKeys(): readonly StatskontoretSourceKey[] {
  return STATSKONTORET_SOURCES.map((s) => s.key);
}