All files / scripts/generate-news-enhanced analysis-cache.ts

83.78% Statements 31/37
50% Branches 10/20
88.88% Functions 8/9
82.85% Lines 29/35

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143                              11x     11x                             8x 8x 148x 148x   8x                   20x                                     8x 10x   8x 8x 8x                   4x 4x 2x 1x 1x   1x                     5x   5x                 7x 7x 8x         7x                         1x         2x 2x         11x  
/**
 * @module generate-news-enhanced/analysis-cache
 * @description In-memory cache for intermediate AI analysis results during
 * multi-iteration news generation. Prevents redundant re-analysis within a
 * single generation run and supports TTL-based expiry for stale results.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import type { Language } from '../types/language.js';
import type { RawDocument } from '../data-transformers.js';
import type { AIAnalysisResult } from './ai-analysis-pipeline.js';
 
/** Default TTL for cached analysis results: 30 minutes */
const DEFAULT_TTL_MS = 30 * 60 * 1000;
 
/** Maximum number of entries the cache will hold before purging expired + oldest. */
const MAX_CACHE_SIZE = 500;
 
interface CacheEntry {
  result: AIAnalysisResult;
  createdAt: number;
  ttlMs: number;
}
 
/**
 * Simple hash of a string using the FNV-1a algorithm (32-bit).
 * The algorithm uses offset basis 0x811c9dc5 and prime 0x01000193.
 * This is deterministic but NOT cryptographically secure — used only
 * for generating cache keys.
 */
function quickHash(input: string): string {
  let h = 0x811c9dc5;
  for (let i = 0; i < input.length; i++) {
    h ^= input.charCodeAt(i);
    h = Math.imul(h, 0x01000193) >>> 0;
  }
  return h.toString(36);
}
 
/**
 * Cache for AI analysis results.
 *
 * Thread-safety note: Node.js is single-threaded; concurrent async access is
 * fine without additional locking.
 */
export class AnalysisCache {
  private readonly store = new Map<string, CacheEntry>();
 
  /**
   * Generate a deterministic cache key from analysis inputs.
   *
   * @param docs - Documents being analysed
   * @param topic - Focus topic, or null
   * @param iterations - Number of analysis iterations
   * @param lang - Target language
   * @returns Cache key string
   */
  generateKey(
    docs: RawDocument[],
    topic: string | null,
    iterations: number,
    lang: Language,
  ): string {
    // Use stable per-doc identifiers (order-independent via sort) to reduce collision risk.
    // When dok_id is absent, include doktyp + datum alongside the title for uniqueness.
    const docIds = docs
      .map(d => d.dok_id ?? `${d.doktyp ?? ''}:${d.datum ?? ''}:${d.titel ?? d.title ?? ''}`)
      .sort();
    const docPart = `${docIds.length}:${docIds.join(',')}`;
    const raw = `${docPart}|${topic ?? ''}|${iterations}|${lang}`;
    return quickHash(raw);
  }
 
  /**
   * Retrieve a cached analysis result if it exists and has not expired.
   *
   * @param key - Cache key from generateKey()
   * @returns Cached result, or undefined if absent / expired
   */
  get(key: string): AIAnalysisResult | undefined {
    const entry = this.store.get(key);
    if (!entry) return undefined;
    if (Date.now() - entry.createdAt > entry.ttlMs) {
      this.store.delete(key);
      return undefined;
    }
    return entry.result;
  }
 
  /**
   * Store an analysis result in the cache.
   *
   * @param key - Cache key from generateKey()
   * @param result - Analysis result to cache
   * @param ttlMs - Time-to-live in milliseconds (default: 30 min)
   */
  set(key: string, result: AIAnalysisResult, ttlMs: number = DEFAULT_TTL_MS): void {
    this.store.set(key, { result, createdAt: Date.now(), ttlMs });
    // Opportunistically purge expired entries to prevent unbounded growth.
    this.purgeExpired();
  }
 
  /**
   * Remove all expired entries from the cache.
   * Called opportunistically from `set()` to prevent memory growth in
   * long-lived processes. Also enforces a maximum cache size.
   */
  purgeExpired(): void {
    const now = Date.now();
    for (const [k, entry] of this.store) {
      Iif (now - entry.createdAt > entry.ttlMs) {
        this.store.delete(k);
      }
    }
    // If still over the cap after purging expired entries, evict oldest first.
    Iif (this.store.size > MAX_CACHE_SIZE) {
      const sorted = [...this.store.entries()].sort(
        (a, b) => a[1].createdAt - b[1].createdAt,
      );
      const toRemove = sorted.slice(0, this.store.size - MAX_CACHE_SIZE);
      for (const [k] of toRemove) {
        this.store.delete(k);
      }
    }
  }
 
  /** Remove all entries from the cache. */
  clear(): void {
    this.store.clear();
  }
 
  /** Number of live (non-expired) entries currently in the cache. */
  get size(): number {
    this.purgeExpired();
    return this.store.size;
  }
}
 
/** Module-level singleton cache shared across all generators in a run. */
export const sharedAnalysisCache = new AnalysisCache();