All files / scripts/parliamentary-data/persistence/shared strip-metadata.ts

100% Statements 9/9
100% Branches 6/6
100% Functions 1/1
100% Lines 8/8

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43                                    25x                           109x 109x 109x 327x 21x 21x     109x    
/**
 * @module parliamentary-data/persistence/shared/strip-metadata
 * @description Strip in-memory MCP coverage annotations from raw documents
 * before persisting. Keeps `analysis/data/` byte-identical across parallel
 * workflows. Extracted from the original `data-persistence.ts` monolith.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import type { RawDocument } from '../../../data-transformers/types.js';
 
/**
 * Fields that the in-memory pipeline annotates onto documents for the
 * manifest/coverage layer but that must NOT be written into raw
 * `analysis/data/` files. Persisting these breaks byte-identical output
 * across parallel workflows (every run has a fresh `retrievedAt`).
 */
export const STRIPPED_METADATA_FIELDS = new Set<string>([
  'mcpCoverageState',
  'mcpProvenance',
  'mcpSignals',
]);
 
/**
 * Return a shallow clone of `doc` with in-memory MCP coverage metadata removed.
 *
 * Coverage state and provenance live in the manifest and sidecar `.meta.json`,
 * not in the raw persisted document, so this guarantees the data files remain
 * byte-identical regardless of how the in-memory record was decorated upstream.
 */
export function stripInMemoryCoverageMetadata(doc: RawDocument): RawDocument {
  const record = doc as Record<string, unknown>;
  let cloned: Record<string, unknown> | null = null;
  for (const field of STRIPPED_METADATA_FIELDS) {
    if (field in record) {
      if (!cloned) cloned = { ...record };
      delete cloned[field];
    }
  }
  return (cloned ?? record) as RawDocument;
}