Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | 4x 7x 7x 7x 3x 3x 3x 1x 7x 2x 7x 7x 7x 7x 7x 7x 3x 3x 6x 3x 1x 2x | /**
* @module scripts/statskontoret/client
* @description Thin HTTP transport for Statskontoret open-data pages.
*
* Discovers downloadable Excel/CSV-ZIP links and fetches workbooks/archives
* through an allowlisted fetch guard. Parsing belongs in the `parsers/`
* and `domain/` modules — this class is purely transport.
*
* @author Hack23 AB
* @license Apache-2.0
*/
import { StatskontoretError } from './errors.js';
import { extractStatskontoretDownloadLinks } from './extractors/download-links.js';
import { parseStatskontoretCsvZip } from './parsers/csv-zip.js';
import { parseStatskontoretXlsx } from './parsers/xlsx.js';
import {
STATSKONTORET_BASE_URL,
getStatskontoretSource,
} from './source-registry.js';
import { trimTrailingSlash } from './internal/text.js';
import {
assertStatskontoretFetchTarget,
resolveStatskontoretUrl,
} from './internal/url-guard.js';
import type {
StatskontoretClientConfig,
StatskontoretDownloadLink,
StatskontoretSourceKey,
StatskontoretWorkbook,
} from './types.js';
const DEFAULT_TIMEOUT = 15_000;
export class StatskontoretClient {
readonly baseURL: string;
readonly timeout: number;
private readonly fetchFn: typeof fetch;
constructor(config: StatskontoretClientConfig = {}) {
this.baseURL = trimTrailingSlash(config.baseURL ?? STATSKONTORET_BASE_URL);
this.timeout = config.timeout ?? DEFAULT_TIMEOUT;
this.fetchFn = config.fetchFn ?? fetch;
}
async discoverDownloads(sourceKey: StatskontoretSourceKey): Promise<StatskontoretDownloadLink[]> {
const source = getStatskontoretSource(sourceKey);
const pageUrl = resolveStatskontoretUrl(source.url, this.baseURL);
const html = await this.fetchText(pageUrl);
return extractStatskontoretDownloadLinks(html, sourceKey, pageUrl, this.baseURL);
}
async fetchWorkbook(url: string): Promise<StatskontoretWorkbook> {
const buffer = await this.fetchArrayBuffer(url);
return parseStatskontoretXlsx(buffer);
}
async fetchCsvZip(url: string): Promise<Record<string, string>> {
const buffer = await this.fetchArrayBuffer(url);
return parseStatskontoretCsvZip(buffer);
}
async fetchText(url: string): Promise<string> {
const response = await this.fetchWithTimeout(url);
return response.text();
}
async fetchArrayBuffer(url: string): Promise<ArrayBuffer> {
const response = await this.fetchWithTimeout(url);
return response.arrayBuffer();
}
private async fetchWithTimeout(url: string): Promise<Response> {
const resolved = resolveStatskontoretUrl(url, this.baseURL);
assertStatskontoretFetchTarget(resolved, this.baseURL);
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
let response: Response;
try {
response = await this.fetchFn(resolved, {
signal: controller.signal,
headers: {
Accept:
'text/html,application/vnd.openxmlformats-officedocument.spreadsheetml.sheet,application/zip,text/csv,*/*',
},
});
} catch (error) {
const detail = error instanceof Error ? error.message : String(error);
throw new StatskontoretError(
`Statskontoret fetch failed for ${resolved}: ${detail}`,
'http',
{ cause: error },
);
} finally {
clearTimeout(timeoutId);
}
if (!response.ok) {
throw new StatskontoretError(
`Statskontoret API error: ${response.status} ${response.statusText} for ${response.url}`,
'http',
);
}
return response;
}
}
|