generate-news-enhanced analysis-cache.ts

10.81% Statements 4/37
0% Branches 0/20
0% Functions 0/9
11.42% Lines 4/35
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12x
 
 
12x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12x
  /**
 * @module generate-news-enhanced/analysis-cache
 * @description In-memory cache for intermediate AI analysis results during
 * multi-iteration news generation. Prevents redundant re-analysis within a
 * single generation run and supports TTL-based expiry for stale results.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import type { Language } from '../types/language.js';
import type { RawDocument } from '../data-transformers.js';
import type { AIAnalysisResult } from './ai-analysis-pipeline.js';
 
/** Default TTL for cached analysis results: 30 minutes */
const DEFAULT_TTL_MS = 30 * 60 * 1000;
 
/** Maximum number of entries the cache will hold before purging expired + oldest. */
const MAX_CACHE_SIZE = 500;
 
interface CacheEntry {
  result: AIAnalysisResult;
  createdAt: number;
  ttlMs: number;
}
 
/**
 * Simple hash of a string using the FNV-1a algorithm (32-bit).
 * The algorithm uses offset basis 0x811c9dc5 and prime 0x01000193.
 * This is deterministic but NOT cryptographically secure — used only
 * for generating cache keys.
 */
function quickHash(input: string): string {
  let h = 0x811c9dc5;
  for (let i = 0; i < input.length; i++) {
    h ^= input.charCodeAt(i);
    h = Math.imul(h, 0x01000193) >>> 0;
  }
  return h.toString(36);
}
 
/**
 * Cache for AI analysis results.
 *
 * Thread-safety note: Node.js is single-threaded; concurrent async access is
 * fine without additional locking.
 */
export class AnalysisCache {
  private readonly store = new Map<string, CacheEntry>();
 
  /**
   * Generate a deterministic cache key from analysis inputs.
   *
   * @param docs - Documents being analysed
   * @param topic - Focus topic, or null
   * @param iterations - Number of analysis iterations
   * @param lang - Target language
   * @returns Cache key string
   */
  generateKey(
    docs: RawDocument[],
    topic: string | null,
    iterations: number,
    lang: Language,
  ): string {
    // Use stable per-doc identifiers (order-independent via sort) to reduce collision risk.
    // When dok_id is absent, include doktyp + datum alongside the title for uniqueness.
    const docIds = docs
      .map(d => d.dok_id ?? `${d.doktyp ?? ''}:${d.datum ?? ''}:${d.titel ?? d.title ?? ''}`)
      .sort();
    const docPart = `${docIds.length}:${docIds.join(',')}`;
    const raw = `${docPart}|${topic ?? ''}|${iterations}|${lang}`;
    return quickHash(raw);
  }
 
  /**
   * Retrieve a cached analysis result if it exists and has not expired.
   *
   * @param key - Cache key from generateKey()
   * @returns Cached result, or undefined if absent / expired
   */
  get(key: string): AIAnalysisResult | undefined {
    const entry = this.store.get(key);
    if (!entry) return undefined;
    if (Date.now() - entry.createdAt > entry.ttlMs) {
      this.store.delete(key);
      return undefined;
    }
    return entry.result;
  }
 
  /**
   * Store an analysis result in the cache.
   *
   * @param key - Cache key from generateKey()
   * @param result - Analysis result to cache
   * @param ttlMs - Time-to-live in milliseconds (default: 30 min)
   */
  set(key: string, result: AIAnalysisResult, ttlMs: number = DEFAULT_TTL_MS): void {
    this.store.set(key, { result, createdAt: Date.now(), ttlMs });
    // Opportunistically purge expired entries to prevent unbounded growth.
    this.purgeExpired();
  }
 
  /**
   * Remove all expired entries from the cache.
   * Called opportunistically from `set()` to prevent memory growth in
   * long-lived processes. Also enforces a maximum cache size.
   */
  purgeExpired(): void {
    const now = Date.now();
    for (const [k, entry] of this.store) {
      if (now - entry.createdAt > entry.ttlMs) {
        this.store.delete(k);
      }
    }
    // If still over the cap after purging expired entries, evict oldest first.
    if (this.store.size > MAX_CACHE_SIZE) {
      const sorted = [...this.store.entries()].sort(
        (a, b) => a[1].createdAt - b[1].createdAt,
      );
      const toRemove = sorted.slice(0, this.store.size - MAX_CACHE_SIZE);
      for (const [k] of toRemove) {
        this.store.delete(k);
      }
    }
  }
 
  /** Remove all entries from the cache. */
  clear(): void {
    this.store.clear();
  }
 
  /** Number of live (non-expired) entries currently in the cache. */
  get size(): number {
    this.purgeExpired();
    return this.store.size;
  }
}
 
/** Module-level singleton cache shared across all generators in a run. */
export const sharedAnalysisCache = new AnalysisCache();