rules citation-density.ts

100% Statements 41/41
88.88% Branches 16/18
100% Functions 4/4
100% Lines 38/38
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14x
14x
14x
14x
14x
14x
13x
13x
 
14x
14x
14x
14x
846x
14x
 
 
 
 
 
 
 
3x
3x
1x
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
7x
7x
7x
1x
 
 
 
 
 
 
 
6x
5x
5x
5x
3x
3x
 
 
 
 
 
3x
3x
 
 
3x
3x
3x
3x
2x
 
 
 
 
 
 
 
5x
2x
 
 
 
 
 
 
 
3x
 
  /**
 * @module scripts/validators/article/rules/citation-density
 * @description Citation-density helpers — `countWords` (markdown-aware
 *              token counter) and `computeCitationDensity`
 *              (words per evidence anchor).
 *
 *              Rule census: extracted from
 *              `scripts/validate-article.ts` lines 410–440
 *              (`countWords`, `computeCitationDensity`). Logic is
 *              byte-identical to the original.
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import { countArticleEvidenceAnchors } from './evidence-anchors.js';
 
/**
 * Count words in text (splits on whitespace, excludes markdown syntax tokens).
 */
export function countWords(text: string): number {
  let cleaned = text;
  cleaned = cleaned.replace(/```[^\n]*\n[\s\S]*?```/g, '');
  cleaned = cleaned.replace(/`[^`]+`/g, '');
  cleaned = cleaned.replace(/!\[[^\]]*\]\([^)]*\)/g, '');
  let prev = '';
  while (prev !== cleaned) {
    prev = cleaned;
    cleaned = cleaned.replace(/<[^>]+>/g, '');
  }
  cleaned = cleaned.replace(/^\s*\|[\s:|-]+\|\s*$/gm, '');
  cleaned = cleaned.replace(/\|/g, ' ');
  cleaned = cleaned.replace(/^\s*[>#+*-]\s*/gm, '');
  cleaned = cleaned.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1');
  const words = cleaned.split(/\s+/).filter((w) => w.length > 0);
  return words.length;
}
 
/**
 * Compute citation density: words per evidence anchor. Lower = denser.
 * Returns Infinity if zero anchors found.
 */
export function computeCitationDensity(text: string): number {
  const anchors = countArticleEvidenceAnchors(text);
  if (anchors === 0) return Infinity;
  const words = countWords(text);
  return words / anchors;
}
 
import { existsSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
import type { ArticleViolation } from '../types.js';
import { REPO_ROOT } from '../types.js';
 
/** Low-citation-density rule. Uses `reference-quality-thresholds.json` per-article overrides. */
export function checkCitationDensity(
  rel: string,
  text: string,
  subfolderName: string,
): ArticleViolation[] {
  const wordCount = countWords(text);
  const anchors = countArticleEvidenceAnchors(text);
  if (wordCount > 0 && anchors === 0) {
    return [
      {
        file: rel,
        code: 'low-citation-density',
        message: `Article has ${wordCount} words but zero verifiable evidence anchors. Add dok_id references, vote IDs, or primary-source URLs.`,
      },
    ];
  }
  if (anchors === 0) return [];
  const density = wordCount / anchors;
  let threshold = 200;
  if (subfolderName) {
    try {
      const thresholdsPath = join(
        REPO_ROOT,
        'analysis',
        'methodologies',
        'reference-quality-thresholds.json',
      );
      Eif (existsSync(thresholdsPath)) {
        const thresholds = JSON.parse(readFileSync(thresholdsPath, 'utf8')) as {
          aiFirst?: { citationDensity?: { perArticle?: Record<string, number | string> } };
        };
        const perArticle = thresholds.aiFirst?.citationDensity?.perArticle;
        Eif (perArticle) {
          const typeThreshold = perArticle[subfolderName];
          if (typeof typeThreshold === 'number') {
            threshold = typeThreshold;
          }
        }
      }
    } catch {
      // Fall back to default threshold on parse error
    }
  }
  if (density > threshold) {
    return [
      {
        file: rel,
        code: 'low-citation-density',
        message: `Citation density is ${Math.round(density)} words/anchor — maximum allowed is ${threshold} (for article type "${subfolderName}"). Add more evidence anchors (dok_id, vote IDs, primary-source URLs) to meet the editorial floor.`,
      },
    ];
  }
  return [];
}