All files / scripts/parliamentary-data/persistence/shared sanitize.ts

100% Statements 6/6
75% Branches 3/4
100% Functions 2/2
100% Lines 5/5

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40                                  188x                           34x 34x 34x       34x    
/**
 * @module parliamentary-data/persistence/shared/sanitize
 * @description Filename / path-segment sanitisation shared across all
 * persistence helpers. Extracted from the original `data-persistence.ts`
 * monolith as part of the >600-line refactor (issue #2579).
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
/**
 * Sanitize a Riksdag document identifier for safe use as a filename.
 * Lowercases, replaces non-alphanumeric characters (preserving Swedish chars
 * and hyphens), collapses runs of hyphens, trims leading/trailing hyphens,
 * and caps at 100 characters.
 */
export function sanitizeDokId(dokId: string): string {
  return dokId
    .toLowerCase()
    .replace(/[^a-z0-9åäö-]/g, '-')
    .replace(/-+/g, '-')
    .replace(/^-|-$/g, '')
    .slice(0, 100);
}
 
/**
 * Sanitize a path segment to prevent path traversal.
 * Preserves underscores (common in MCP tool names like get_voting_group)
 * but removes slashes, null bytes, and dots-only sequences.
 */
export function sanitizePathSegment(segment: string): string {
  let safe = segment.replace(/[/\\:\0]/g, '_');
  if (/^\.+$/.test(safe)) safe = '_dots_';
  safe = safe.replace(/[^a-zA-Z0-9_\-åäöÅÄÖ]/g, '_')
    .replace(/_+/g, '_')
    .replace(/^_|_$/g, '')
    .slice(0, 100);
  return safe || 'unknown';
}