Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | 5x 5x 5x 5x 5x 5x 5x 5x 6x 6x 6x 6x 6x 5x 5x 5x 5x 6x 6x 6x 6x 5x 5x 5x 5x 6x 6x 6x 6x 5x 5x 4x 4x 4x 33x 4x 6x 6x 6x 20x 20x 20x 70x 70x 70x 70x 71x 6x 70x 70x 70x 70x 28x 70x 70x 70x 70x 70x 70x 20x 20x 20x | /**
* @module scripts/statskontoret/parsers/xlsx
* @description Defensive XLSX workbook parser for Statskontoret workbooks.
*
* Operates directly on the OOXML zip envelope so the client doesn't depend
* on a full SpreadsheetML library — only `jszip` is required. Isolating
* the parser makes it easy to add fuzz tests against malformed workbooks.
*
* @author Hack23 AB
* @license Apache-2.0
*/
import JSZip from 'jszip';
import { StatskontoretError } from '../errors.js';
import type { StatskontoretSheet, StatskontoretWorkbook } from '../types.js';
import {
decodeXml,
extractTextNodes,
firstXmlTagValue,
parseXmlAttributes,
} from '../internal/text.js';
export async function parseStatskontoretXlsx(
input: ArrayBuffer | Uint8Array,
): Promise<StatskontoretWorkbook> {
const zip = await JSZip.loadAsync(input);
const workbookXml = await readZipText(zip, 'xl/workbook.xml');
const workbookRelsXml = await readZipText(zip, 'xl/_rels/workbook.xml.rels');
const sharedStringsXml = zip.file('xl/sharedStrings.xml')
? await readZipText(zip, 'xl/sharedStrings.xml')
: '';
const sharedStrings = parseSharedStrings(sharedStringsXml);
const rels = parseWorkbookRelationships(workbookRelsXml);
const sheets: StatskontoretSheet[] = [];
for (const sheet of parseWorkbookSheets(workbookXml)) {
const target = rels.get(sheet.relationshipId);
Iif (!target) continue;
const sheetPath = target.startsWith('/') ? target.slice(1) : `xl/${target}`;
const sheetXml = await readZipText(zip, sheetPath.replace(/\/\.\//g, '/'));
sheets.push({ name: sheet.name, rows: parseWorksheetRows(sheetXml, sharedStrings) });
}
return { sheets };
}
function parseWorkbookSheets(xml: string): Array<{ name: string; relationshipId: string }> {
const sheets: Array<{ name: string; relationshipId: string }> = [];
const sheetRe = /<sheet\b([^>]*)\/>/gi;
for (const match of xml.matchAll(sheetRe)) {
const attrs = parseXmlAttributes(match[1] ?? '');
const name = attrs.get('name');
const relationshipId = attrs.get('r:id') ?? attrs.get('id');
Eif (name && relationshipId) sheets.push({ name: decodeXml(name), relationshipId });
}
return sheets;
}
function parseWorkbookRelationships(xml: string): Map<string, string> {
const rels = new Map<string, string>();
const relRe = /<Relationship\b([^>]*)\/>/gi;
for (const match of xml.matchAll(relRe)) {
const attrs = parseXmlAttributes(match[1] ?? '');
const id = attrs.get('Id');
const target = attrs.get('Target');
Eif (id && target) rels.set(id, target);
}
return rels;
}
function parseSharedStrings(xml: string): string[] {
if (!xml) return [];
const strings: string[] = [];
const siRe = /<si\b[^>]*>([\s\S]*?)<\/si>/gi;
for (const match of xml.matchAll(siRe)) {
strings.push(extractTextNodes(match[1] ?? ''));
}
return strings;
}
function parseWorksheetRows(xml: string, sharedStrings: readonly string[]): string[][] {
const rows: string[][] = [];
const rowRe = /<row\b[^>]*>([\s\S]*?)<\/row>/gi;
for (const rowMatch of xml.matchAll(rowRe)) {
const row: string[] = [];
const cellRe = /<c\b([^>]*)>([\s\S]*?)<\/c>/gi;
for (const cellMatch of (rowMatch[1] ?? '').matchAll(cellRe)) {
const attrs = parseXmlAttributes(cellMatch[1] ?? '');
const ref = attrs.get('r') ?? '';
const cellIndex = cellRefToColumnIndex(ref) ?? row.length;
row[cellIndex] = parseCellValue(cellMatch[2] ?? '', attrs.get('t'), sharedStrings);
}
rows.push(Array.from({ length: row.length }, (_, i) => row[i] ?? ''));
}
return rows;
}
function parseCellValue(
xml: string,
type: string | undefined,
sharedStrings: readonly string[],
): string {
Iif (type === 'inlineStr') return extractTextNodes(xml);
const value = firstXmlTagValue(xml, 'v');
Iif (value === undefined) return '';
if (type === 's') return sharedStrings[Number.parseInt(value, 10)] ?? '';
return decodeXml(value);
}
function cellRefToColumnIndex(ref: string): number | undefined {
const letters = ref.match(/^[A-Z]+/i)?.[0];
Iif (!letters) return undefined;
let index = 0;
for (const char of letters.toUpperCase()) {
index = index * 26 + (char.charCodeAt(0) - 65 + 1);
}
return index - 1;
}
async function readZipText(zip: JSZip, path: string): Promise<string> {
const file = zip.file(path);
Iif (!file) throw new StatskontoretError(`Statskontoret workbook missing ${path}`, 'workbook');
return file.async('string');
}
|