ai-analysis helpers.ts

91.66% Statements 22/24
70.21% Branches 33/47
100% Functions 8/8
90% Lines 18/20
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7913x
 
 
 
 
669x
 
 
 
 
859x
859x
859x
 
 
 
859x
859x
 
 
 
 
 
 
 
675x
 
 
 
 
 
 
 
 
2241x
2241x
2202x
7x
 
 
 
 
 
 
 
1783x
 
 
 
 
 
 
928x
 
 
 
 
604x
604x
9x
 
 
604x
 
  /**
 * @module ai-analysis/helpers
 * @description Shared document utility functions for the AI analysis pipeline.
 *
 * Provides low-level document inspection helpers used across all bounded
 * contexts (SWOT, domains, visualisation) to avoid duplication and ensure
 * consistent document-type normalisation.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import type { Language } from '../types/language.js';
import type { RawDocument } from '../data-transformers/types.js';
import { extractKeyPassage, cleanMotionText, isPersonProfileText } from '../data-transformers/helpers.js';
 
// ---------------------------------------------------------------------------
// Shared types
// ---------------------------------------------------------------------------
 
/** Shorthand for a partial language lookup record. */
export type LangRecord = Partial<Record<Language, string>>;
 
// ---------------------------------------------------------------------------
// Document inspection helpers
// ---------------------------------------------------------------------------
 
/** Extract the normalised document type key. */
export function docType(doc: RawDocument): string {
  return (doc.doktyp || doc.documentType || '').toLowerCase();
}
 
/** Extract a human-readable document title. */
export function docTitle(doc: RawDocument): string {
  return (doc.titel || doc.title || doc.dokumentnamn || doc.dok_id || '').trim();
}
 
/** Extract a stable document identifier. */
export function docId(doc: RawDocument): string {
  const primaryId = doc.dok_id || doc.url || doc.titel || doc.title || doc.dokumentnamn;
  Eif (primaryId) {
    return primaryId;
  }
  // Deterministic composite fallback to avoid collisions on literal 'unknown'
  const fallbackType = normalizedDocType(doc) || 'other';
  const fallbackTitle = docTitle(doc) || doc.datum || 'unknown';
  return `${fallbackType}:${fallbackTitle}`;
}
 
/**
 * Test whether a document is an SFS (enacted law/statute) — matches both
 * `doktyp === 'sfs'` and `dokumentnamn` starting with 'SFS'.
 */
export function isSfsDoc(doc: RawDocument): boolean {
  return docType(doc) === 'sfs' || (doc.dokumentnamn || '').startsWith('SFS');
}
 
/**
 * Normalize document type key, treating SFS-by-name documents (missing `doktyp`
 * but with `dokumentnamn` starting with 'SFS') as `'sfs'` and empty types as `'other'`.
 * Reuse this everywhere a doc-type key is needed (mindmap, dashboard, confidence).
 */
export function normalizedDocType(doc: RawDocument): string {
  const raw = docType(doc);
  if (raw === 'eu') return 'fpm';
  if (raw) return raw;
  Eif (isSfsDoc(doc)) return 'sfs';
  return 'other';
}
 
/**
 * Predicate: document metadata was enriched via `enrichDocumentsWithContent()`.
 */
export function isMetadataEnriched(doc: RawDocument): boolean {
  return Boolean(doc.contentFetched);
}
 
/**
 * Predicate: document has full-text or full-HTML content available.
 */
export function hasFullTextContent(doc: RawDocument): boolean {
  return Boolean(doc.contentFetched && (doc.fullText || doc.fullContent));
}
 
/** Extract a meaningful text passage from an enriched document. */
export function extractPassage(doc: RawDocument, maxChars = 400): string | null {
  const raw = doc.fullText || doc.fullContent || '';
  if (!raw || isPersonProfileText(raw)) return null;
  const cleaned = docType(doc) === 'mot' && raw.includes('Motion till riksdagen')
    ? cleanMotionText(raw)
    : raw;
  return extractKeyPassage(cleaned, maxChars) || null;
}