Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 | 7913x 669x 859x 859x 859x 859x 859x 675x 2241x 2241x 2202x 7x 1783x 928x 604x 604x 9x 604x | /**
* @module ai-analysis/helpers
* @description Shared document utility functions for the AI analysis pipeline.
*
* Provides low-level document inspection helpers used across all bounded
* contexts (SWOT, domains, visualisation) to avoid duplication and ensure
* consistent document-type normalisation.
*
* @author Hack23 AB
* @license Apache-2.0
*/
import type { Language } from '../types/language.js';
import type { RawDocument } from '../data-transformers/types.js';
import { extractKeyPassage, cleanMotionText, isPersonProfileText } from '../data-transformers/helpers.js';
// ---------------------------------------------------------------------------
// Shared types
// ---------------------------------------------------------------------------
/** Shorthand for a partial language lookup record. */
export type LangRecord = Partial<Record<Language, string>>;
// ---------------------------------------------------------------------------
// Document inspection helpers
// ---------------------------------------------------------------------------
/** Extract the normalised document type key. */
export function docType(doc: RawDocument): string {
return (doc.doktyp || doc.documentType || '').toLowerCase();
}
/** Extract a human-readable document title. */
export function docTitle(doc: RawDocument): string {
return (doc.titel || doc.title || doc.dokumentnamn || doc.dok_id || '').trim();
}
/** Extract a stable document identifier. */
export function docId(doc: RawDocument): string {
const primaryId = doc.dok_id || doc.url || doc.titel || doc.title || doc.dokumentnamn;
Eif (primaryId) {
return primaryId;
}
// Deterministic composite fallback to avoid collisions on literal 'unknown'
const fallbackType = normalizedDocType(doc) || 'other';
const fallbackTitle = docTitle(doc) || doc.datum || 'unknown';
return `${fallbackType}:${fallbackTitle}`;
}
/**
* Test whether a document is an SFS (enacted law/statute) — matches both
* `doktyp === 'sfs'` and `dokumentnamn` starting with 'SFS'.
*/
export function isSfsDoc(doc: RawDocument): boolean {
return docType(doc) === 'sfs' || (doc.dokumentnamn || '').startsWith('SFS');
}
/**
* Normalize document type key, treating SFS-by-name documents (missing `doktyp`
* but with `dokumentnamn` starting with 'SFS') as `'sfs'` and empty types as `'other'`.
* Reuse this everywhere a doc-type key is needed (mindmap, dashboard, confidence).
*/
export function normalizedDocType(doc: RawDocument): string {
const raw = docType(doc);
if (raw === 'eu') return 'fpm';
if (raw) return raw;
Eif (isSfsDoc(doc)) return 'sfs';
return 'other';
}
/**
* Predicate: document metadata was enriched via `enrichDocumentsWithContent()`.
*/
export function isMetadataEnriched(doc: RawDocument): boolean {
return Boolean(doc.contentFetched);
}
/**
* Predicate: document has full-text or full-HTML content available.
*/
export function hasFullTextContent(doc: RawDocument): boolean {
return Boolean(doc.contentFetched && (doc.fullText || doc.fullContent));
}
/** Extract a meaningful text passage from an enriched document. */
export function extractPassage(doc: RawDocument, maxChars = 400): string | null {
const raw = doc.fullText || doc.fullContent || '';
if (!raw || isPersonProfileText(raw)) return null;
const cleaned = docType(doc) === 'mot' && raw.includes('Motion till riksdagen')
? cleanMotionText(raw)
: raw;
return extractKeyPassage(cleaned, maxChars) || null;
}
|