Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | 14x 14x 14x 14x 14x 14x 13x 13x 14x 14x 14x 14x 846x 14x 3x 3x 1x 1x 7x 7x 7x 1x 6x 5x 5x 5x 3x 3x 3x 3x 3x 3x 3x 3x 2x 5x 2x 3x | /**
* @module scripts/validators/article/rules/citation-density
* @description Citation-density helpers — `countWords` (markdown-aware
* token counter) and `computeCitationDensity`
* (words per evidence anchor).
*
* Rule census: extracted from
* `scripts/validate-article.ts` lines 410–440
* (`countWords`, `computeCitationDensity`). Logic is
* byte-identical to the original.
*
* @author Hack23 AB
* @license Apache-2.0
*/
import { countArticleEvidenceAnchors } from './evidence-anchors.js';
/**
* Count words in text (splits on whitespace, excludes markdown syntax tokens).
*/
export function countWords(text: string): number {
let cleaned = text;
cleaned = cleaned.replace(/```[^\n]*\n[\s\S]*?```/g, '');
cleaned = cleaned.replace(/`[^`]+`/g, '');
cleaned = cleaned.replace(/!\[[^\]]*\]\([^)]*\)/g, '');
let prev = '';
while (prev !== cleaned) {
prev = cleaned;
cleaned = cleaned.replace(/<[^>]+>/g, '');
}
cleaned = cleaned.replace(/^\s*\|[\s:|-]+\|\s*$/gm, '');
cleaned = cleaned.replace(/\|/g, ' ');
cleaned = cleaned.replace(/^\s*[>#+*-]\s*/gm, '');
cleaned = cleaned.replace(/\[([^\]]*)\]\([^)]*\)/g, '$1');
const words = cleaned.split(/\s+/).filter((w) => w.length > 0);
return words.length;
}
/**
* Compute citation density: words per evidence anchor. Lower = denser.
* Returns Infinity if zero anchors found.
*/
export function computeCitationDensity(text: string): number {
const anchors = countArticleEvidenceAnchors(text);
if (anchors === 0) return Infinity;
const words = countWords(text);
return words / anchors;
}
import { existsSync, readFileSync } from 'node:fs';
import { join } from 'node:path';
import type { ArticleViolation } from '../types.js';
import { REPO_ROOT } from '../types.js';
/** Low-citation-density rule. Uses `reference-quality-thresholds.json` per-article overrides. */
export function checkCitationDensity(
rel: string,
text: string,
subfolderName: string,
): ArticleViolation[] {
const wordCount = countWords(text);
const anchors = countArticleEvidenceAnchors(text);
if (wordCount > 0 && anchors === 0) {
return [
{
file: rel,
code: 'low-citation-density',
message: `Article has ${wordCount} words but zero verifiable evidence anchors. Add dok_id references, vote IDs, or primary-source URLs.`,
},
];
}
if (anchors === 0) return [];
const density = wordCount / anchors;
let threshold = 200;
if (subfolderName) {
try {
const thresholdsPath = join(
REPO_ROOT,
'analysis',
'methodologies',
'reference-quality-thresholds.json',
);
Eif (existsSync(thresholdsPath)) {
const thresholds = JSON.parse(readFileSync(thresholdsPath, 'utf8')) as {
aiFirst?: { citationDensity?: { perArticle?: Record<string, number | string> } };
};
const perArticle = thresholds.aiFirst?.citationDensity?.perArticle;
Eif (perArticle) {
const typeThreshold = perArticle[subfolderName];
if (typeof typeThreshold === 'number') {
threshold = typeThreshold;
}
}
}
} catch {
// Fall back to default threshold on parse error
}
}
if (density > threshold) {
return [
{
file: rel,
code: 'low-citation-density',
message: `Citation density is ${Math.round(density)} words/anchor — maximum allowed is ${threshold} (for article type "${subfolderName}"). Add more evidence anchors (dok_id, vote IDs, primary-source URLs) to meet the editorial floor.`,
},
];
}
return [];
}
|