All files / scripts/validators/article/rules banned-phrases.ts

95% Statements 57/60
85.18% Branches 23/27
100% Functions 5/5
96.22% Lines 51/53

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124                                              2x 2x                 10x 9x 9x 2x 2x 2x   7x 7x 7x 1x   5x 5x 5x 242x 241x 241x 240x 240x 238x 238x   5x     1x   7x 7x         16x 16x                     7x 7x 7x 133x 133x 131x 131x 131x 6x 6x 6x 6x 6x     7x             2x 2x 2x               2x 2x 2x 1x 1x           2x    
/**
 * @module scripts/validators/article/rules/banned-phrases
 * @description Banned-phrase scanner — load + cache the canonical
 *              `political-style-guide.json` list and scan article text
 *              for case-insensitive literal substring matches.
 *
 *              Rule census: extracted from
 *              `scripts/validate-article.ts` lines 315–386 (cache,
 *              `loadBannedPhrases`, `resetBannedPhrasesCache`,
 *              `scanBannedPhrases`). Logic is byte-identical to the
 *              original; the module-scope cache + reset API are
 *              preserved exactly because tests in
 *              `tests/validate-article.test.ts` depend on the behaviour.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import { existsSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
 
import { REPO_ROOT } from '../types.js';
 
let _bannedPhrasesCache: string[] | null = null;
let _bannedPhrasesCacheLoaded = false;
 
/**
 * Load banned phrases from the canonical JSON file. Returns the flat
 * array of literal substrings. Caches after first load.
 * Returns `null` when the canonical file is missing or malformed so
 * callers can emit an explicit violation rather than silently skipping.
 */
export function loadBannedPhrases(repoRoot: string = REPO_ROOT): string[] | null {
  if (_bannedPhrasesCacheLoaded) return _bannedPhrasesCache;
  const jsonPath = join(repoRoot, 'analysis', 'methodologies', 'political-style-guide.json');
  if (!existsSync(jsonPath)) {
    _bannedPhrasesCacheLoaded = true;
    _bannedPhrasesCache = null;
    return null;
  }
  try {
    const data = JSON.parse(readFileSync(jsonPath, 'utf8')) as { allPhrases?: unknown };
    if (!Array.isArray(data.allPhrases) || data.allPhrases.length === 0) {
      _bannedPhrasesCache = null;
    } else {
      const seen = new Set<string>();
      const phrases: string[] = [];
      for (const item of data.allPhrases) {
        if (typeof item !== 'string') continue;
        const trimmed = item.trim();
        if (trimmed.length === 0) continue;
        const key = trimmed.toLowerCase();
        if (seen.has(key)) continue;
        seen.add(key);
        phrases.push(trimmed);
      }
      _bannedPhrasesCache = phrases.length > 0 ? phrases : null;
    }
  } catch {
    _bannedPhrasesCache = null;
  }
  _bannedPhrasesCacheLoaded = true;
  return _bannedPhrasesCache;
}
 
/** Reset cache (for testing). */
export function resetBannedPhrasesCache(): void {
  _bannedPhrasesCache = null;
  _bannedPhrasesCacheLoaded = false;
}
 
/**
 * Scan text for banned phrases (case-insensitive literal substring match).
 * Returns the list of hits with the matched phrase and a short context snippet.
 */
export function scanBannedPhrases(
  text: string,
  bannedPhrases: string[],
): Array<{ phrase: string; context: string }> {
  const hits: Array<{ phrase: string; context: string }> = [];
  const lower = text.toLowerCase();
  for (const phrase of bannedPhrases) {
    const trimmed = phrase.trim();
    if (trimmed.length === 0) continue;
    const needle = trimmed.toLowerCase();
    let idx = lower.indexOf(needle);
    while (idx !== -1) {
      const start = Math.max(0, idx - 20);
      const end = Math.min(text.length, idx + trimmed.length + 20);
      const context = text.slice(start, end).replace(/\n/g, ' ');
      hits.push({ phrase: trimmed, context });
      idx = lower.indexOf(needle, idx + needle.length);
    }
  }
  return hits;
}
 
import type { ArticleViolation } from '../types.js';
 
/** Banned-phrase rule. Emits `missing-banned-phrase-list` when the canonical file is unloadable. */
export function checkBannedPhrases(rel: string, text: string): ArticleViolation[] {
  const out: ArticleViolation[] = [];
  const bannedPhrases = loadBannedPhrases();
  Iif (bannedPhrases === null) {
    out.push({
      file: rel,
      code: 'missing-banned-phrase-list',
      message: `Canonical banned-phrase file (analysis/methodologies/political-style-guide.json) is missing or malformed — editorial QA check cannot run. Ensure the file exists and contains a valid "allPhrases" array.`,
    });
    return out;
  }
  Iif (bannedPhrases.length === 0) return out;
  const hits = scanBannedPhrases(text, bannedPhrases);
  if (hits.length > 0) {
    const sample = hits.slice(0, 3).map((h) => `"${h.phrase}"`).join(', ');
    out.push({
      file: rel,
      code: 'banned-phrase-detected',
      message: `Article contains ${hits.length} banned phrase(s) (${sample}${hits.length > 3 ? ', …' : ''}). Rewrite using evidence-anchored alternatives per political-style-guide.json (human-readable companion: political-style-guide.md).`,
    });
  }
  return out;
}