All files / scripts/ai-analysis helpers.ts

91.66% Statements 22/24
70.21% Branches 33/47
100% Functions 8/8
90% Lines 18/20

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94                                                          7913x         669x         859x 859x 859x       859x 859x               675x                 2241x 2241x 2202x 7x               1783x             928x         604x 604x 9x     604x    
/**
 * @module ai-analysis/helpers
 * @description Shared document utility functions for the AI analysis pipeline.
 *
 * Provides low-level document inspection helpers used across all bounded
 * contexts (SWOT, domains, visualisation) to avoid duplication and ensure
 * consistent document-type normalisation.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import type { Language } from '../types/language.js';
import type { RawDocument } from '../data-transformers/types.js';
import { extractKeyPassage, cleanMotionText, isPersonProfileText } from '../data-transformers/helpers.js';
 
// ---------------------------------------------------------------------------
// Shared types
// ---------------------------------------------------------------------------
 
/** Shorthand for a partial language lookup record. */
export type LangRecord = Partial<Record<Language, string>>;
 
// ---------------------------------------------------------------------------
// Document inspection helpers
// ---------------------------------------------------------------------------
 
/** Extract the normalised document type key. */
export function docType(doc: RawDocument): string {
  return (doc.doktyp || doc.documentType || '').toLowerCase();
}
 
/** Extract a human-readable document title. */
export function docTitle(doc: RawDocument): string {
  return (doc.titel || doc.title || doc.dokumentnamn || doc.dok_id || '').trim();
}
 
/** Extract a stable document identifier. */
export function docId(doc: RawDocument): string {
  const primaryId = doc.dok_id || doc.url || doc.titel || doc.title || doc.dokumentnamn;
  Eif (primaryId) {
    return primaryId;
  }
  // Deterministic composite fallback to avoid collisions on literal 'unknown'
  const fallbackType = normalizedDocType(doc) || 'other';
  const fallbackTitle = docTitle(doc) || doc.datum || 'unknown';
  return `${fallbackType}:${fallbackTitle}`;
}
 
/**
 * Test whether a document is an SFS (enacted law/statute) — matches both
 * `doktyp === 'sfs'` and `dokumentnamn` starting with 'SFS'.
 */
export function isSfsDoc(doc: RawDocument): boolean {
  return docType(doc) === 'sfs' || (doc.dokumentnamn || '').startsWith('SFS');
}
 
/**
 * Normalize document type key, treating SFS-by-name documents (missing `doktyp`
 * but with `dokumentnamn` starting with 'SFS') as `'sfs'` and empty types as `'other'`.
 * Reuse this everywhere a doc-type key is needed (mindmap, dashboard, confidence).
 */
export function normalizedDocType(doc: RawDocument): string {
  const raw = docType(doc);
  if (raw === 'eu') return 'fpm';
  if (raw) return raw;
  Eif (isSfsDoc(doc)) return 'sfs';
  return 'other';
}
 
/**
 * Predicate: document metadata was enriched via `enrichDocumentsWithContent()`.
 */
export function isMetadataEnriched(doc: RawDocument): boolean {
  return Boolean(doc.contentFetched);
}
 
/**
 * Predicate: document has full-text or full-HTML content available.
 */
export function hasFullTextContent(doc: RawDocument): boolean {
  return Boolean(doc.contentFetched && (doc.fullText || doc.fullContent));
}
 
/** Extract a meaningful text passage from an enriched document. */
export function extractPassage(doc: RawDocument, maxChars = 400): string | null {
  const raw = doc.fullText || doc.fullContent || '';
  if (!raw || isPersonProfileText(raw)) return null;
  const cleaned = docType(doc) === 'mot' && raw.includes('Motion till riksdagen')
    ? cleanMotionText(raw)
    : raw;
  return extractKeyPassage(cleaned, maxChars) || null;
}