Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 | 1x 1x 1x 1x 982x 1023x 1113x 8x 45x 45x 5x 5x 5x 3x 3x 3x 3x 8x 8x 45x 45x 8x 1x 1x 1x 1x 1x 8x 8x 45x 8x 8x 14208x 1776x 2x 1x 8x 1x 8x 8x 8x 8x 45x 444x 444x 444x 444x 444x 3552x 3552x 3552x 3552x 3552x 3552x 3552x 3552x 8x 8x 8x 45x 444x 444x 444x 444x 1753x 444x 439x 45x 8x 8x 442x 442x 442x 442x 8x 12x 8x 12x 3x 6x 8x 8x | /**
* @module analysis-framework/cross-reference
* @description Cross-document relationship detection for parliamentary document batches.
*
* Detects the following relationship types between documents:
* - `responds-to` — Motion responding to a government proposition
* - `amends` — Amendment document modifying an existing act
* - `implements` — Implementation measure for an EU directive or treaty commitment
* - `contradicts` — Conflicting policy positions between two documents
* - `related-topic` — Thematically related documents sharing policy domains
*
* @author Hack23 AB
* @license Apache-2.0
*/
import type { RawDocument } from '../data-transformers/types.js';
import type { DocumentLink } from './types.js';
import { detectPolicyDomains } from '../data-transformers/policy-analysis.js';
import { extractPropRef } from '../data-transformers/document-analysis.js';
// ---------------------------------------------------------------------------
// Constants
// ---------------------------------------------------------------------------
/** Minimum domain overlap to classify two documents as `related-topic` */
const RELATED_TOPIC_DOMAIN_OVERLAP = 2;
/** EU implementation keywords */
const EU_IMPL_KEYWORDS: readonly string[] = [
'implementering', 'implementation', 'genomförande', 'transponering', 'transposition',
'genomföra', 'implementera', 'med anledning av eu', 'eu-direktiv', 'eu directive',
];
/** Amendment keywords */
const AMEND_KEYWORDS: readonly string[] = [
'ändring', 'amendment', 'ändringslag', 'ändringsförordning', 'tillägg', 'addition',
'komplettering', 'supplement', 'uppdatering', 'update', 'revision',
];
/** Conflict indicator terms (paired policy positions) */
const CONFLICT_PAIRS: ReadonlyArray<readonly [string, string]> = [
['skattehöjning', 'skattesänkning'],
['tax increase', 'tax cut'],
['privatisering', 'förstatligande'],
['privatization', 'nationalization'],
['avreglering', 'reglering'],
['deregulation', 'regulation'],
['immigration increase', 'immigration decrease'],
['fler i arbete', 'bidragsbegränsning'],
];
// ---------------------------------------------------------------------------
// Helper utilities
// ---------------------------------------------------------------------------
export function docId(doc: RawDocument): string {
return doc.dok_id || doc.url || doc.titel || doc.title || 'unknown';
}
function docText(doc: RawDocument): string {
return [doc.titel, doc.title, doc.summary, doc.notis].filter(Boolean).join(' ').toLowerCase();
}
function containsAny(text: string, keywords: readonly string[]): boolean {
return keywords.some(kw => text.includes(kw.toLowerCase()));
}
// ---------------------------------------------------------------------------
// Individual link detectors
// ---------------------------------------------------------------------------
/**
* Detect "responds-to" links: motions that reference a specific proposition.
*/
function detectRespondsTo(docs: RawDocument[]): DocumentLink[] {
const links: DocumentLink[] = [];
const propositions = docs.filter(d => d.doktyp === 'prop' || d.documentType === 'prop');
for (const motion of docs.filter(d => d.doktyp === 'mot' || d.documentType === 'mot')) {
const title = motion.titel || motion.title || '';
const propRef = extractPropRef(title);
if (!propRef) continue;
// Match against propositions in the batch by dok_id or title substring
const matched = propositions.find(p =>
(p.dok_id && p.dok_id.includes(propRef)) ||
((p.titel || p.title || '').includes(propRef))
);
Eif (matched) {
links.push({
sourceId: docId(motion),
targetId: docId(matched),
type: 'responds-to',
reason: `Motion references proposition ${propRef}`,
confidence: 95,
});
}
}
return links;
}
/**
* Detect "implements" links: documents implementing EU directives or treaty commitments.
*/
function detectImplements(docs: RawDocument[], domainCache: Map<string, string[]>): DocumentLink[] {
const links: DocumentLink[] = [];
const implementers = docs.filter(d => containsAny(docText(d), EU_IMPL_KEYWORDS));
const directives = docs.filter(d => containsAny(docText(d), ['eu-direktiv', 'eu directive', 'förordning', 'fördrag']));
for (const impl of implementers) {
for (const dir of directives) {
Eif (docId(impl) === docId(dir)) continue;
// Heuristic: same policy domain and implementation keyword
const implDomains = domainCache.get(docId(impl)) ?? [];
const dirDomains = domainCache.get(docId(dir)) ?? [];
const overlap = implDomains.filter(d => dirDomains.includes(d));
Iif (overlap.length >= 1) {
links.push({
sourceId: docId(impl),
targetId: docId(dir),
type: 'implements',
reason: `Implementation document shares domain(s) ${overlap.join(', ')} with directive/treaty document`,
confidence: 60,
});
}
}
}
return links;
}
/**
* Detect "amends" links: documents explicitly amending earlier legislation.
*/
function detectAmends(docs: RawDocument[]): DocumentLink[] {
const links: DocumentLink[] = [];
const amenders = docs.filter(d => containsAny(docText(d), AMEND_KEYWORDS));
for (const amender of amenders) {
for (const other of docs) {
if (docId(amender) === docId(other)) continue;
// If the amending doc's title references keywords from another doc's title
const amenderTitle = (amender.titel || amender.title || '').toLowerCase();
const otherTitle = (other.titel || other.title || '').toLowerCase();
if (otherTitle.length > 5 && amenderTitle.includes(otherTitle.slice(0, Math.min(20, otherTitle.length)))) {
links.push({
sourceId: docId(amender),
targetId: docId(other),
type: 'amends',
reason: `Amending document title overlaps with target document title`,
confidence: 55,
});
}
}
}
return links;
}
/**
* Test whether `text` contains the term, using word-boundary regex when the
* term is a substring of its conflict partner (e.g. "reglering" inside
* "avreglering"). This prevents compound-word matches from masking the
* simpler term.
*/
function matchesTerm(text: string, term: string, regex: RegExp | null): boolean {
if (!regex) return text.includes(term);
return regex.test(text);
}
function escapeForRegex(s: string): string {
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
}
/** Pre-computed flags: true when one conflict term is a substring of the other */
const BOUNDARY_FLAGS: ReadonlyArray<readonly [boolean, boolean]> = CONFLICT_PAIRS.map(
([a, b]) => [b.includes(a), a.includes(b)] as const
);
/**
* Pre-compiled regex patterns for conflict terms that need word-boundary matching.
* `null` means the term can use simple `includes()`.
*/
const TERM_REGEXES: ReadonlyArray<readonly [RegExp | null, RegExp | null]> = CONFLICT_PAIRS.map(
([a, b], idx) => {
const [aNeedsBoundary, bNeedsBoundary] = BOUNDARY_FLAGS[idx];
return [
aNeedsBoundary ? new RegExp(`(?<![a-zåäö])${escapeForRegex(a)}`, 'i') : null,
bNeedsBoundary ? new RegExp(`(?<![a-zåäö])${escapeForRegex(b)}`, 'i') : null,
] as const;
}
);
/**
* Detect "contradicts" links: documents with opposing policy positions.
*/
function detectContradicts(docs: RawDocument[]): DocumentLink[] {
const links: DocumentLink[] = [];
for (let i = 0; i < docs.length; i++) {
for (let j = i + 1; j < docs.length; j++) {
const docA = docs[i];
const docB = docs[j];
const textA = docText(docA);
const textB = docText(docB);
for (let p = 0; p < CONFLICT_PAIRS.length; p++) {
const [termA, termB] = CONFLICT_PAIRS[p];
const [regexA, regexB] = TERM_REGEXES[p];
const aHasA = matchesTerm(textA, termA.toLowerCase(), regexA);
const aHasB = matchesTerm(textA, termB.toLowerCase(), regexB);
const bHasA = matchesTerm(textB, termA.toLowerCase(), regexA);
const bHasB = matchesTerm(textB, termB.toLowerCase(), regexB);
// True contradiction: one doc has term A, the other has term B (not both)
Iif ((aHasA && !aHasB) && (bHasB && !bHasA)) {
links.push({
sourceId: docId(docA),
targetId: docId(docB),
type: 'contradicts',
reason: `Conflicting policy positions: "${termA}" vs "${termB}"`,
confidence: 65,
});
break; // One contradiction per pair is enough
}
Iif ((aHasB && !aHasA) && (bHasA && !bHasB)) {
links.push({
sourceId: docId(docA),
targetId: docId(docB),
type: 'contradicts',
reason: `Conflicting policy positions: "${termB}" vs "${termA}"`,
confidence: 65,
});
break;
}
}
}
}
return links;
}
/**
* Detect "related-topic" links: documents sharing multiple policy domains.
*/
function detectRelatedTopics(docs: RawDocument[], domainCache: Map<string, string[]>): DocumentLink[] {
const links: DocumentLink[] = [];
for (let i = 0; i < docs.length; i++) {
for (let j = i + 1; j < docs.length; j++) {
const idA = docId(docs[i]);
const idB = docId(docs[j]);
const domainsA = domainCache.get(idA) ?? [];
const domainsB = domainCache.get(idB) ?? [];
const overlap = domainsA.filter(d => domainsB.includes(d));
if (overlap.length >= RELATED_TOPIC_DOMAIN_OVERLAP) {
links.push({
sourceId: idA,
targetId: idB,
type: 'related-topic',
reason: `Shares ${overlap.length} policy domain(s): ${overlap.join(', ')}`,
confidence: Math.min(90, 40 + overlap.length * 15),
});
}
}
}
return links;
}
// ---------------------------------------------------------------------------
// Deduplication
// ---------------------------------------------------------------------------
/**
* Remove duplicate links (same source, target, and type), keeping the
* entry with the highest confidence.
*/
function deduplicateLinks(links: DocumentLink[]): DocumentLink[] {
const seen = new Map<string, DocumentLink>();
for (const link of links) {
const key = `${link.sourceId}||${link.targetId}||${link.type}`;
const existing = seen.get(key);
Eif (!existing || link.confidence > existing.confidence) {
seen.set(key, link);
}
}
return Array.from(seen.values());
}
// ---------------------------------------------------------------------------
// Public API
// ---------------------------------------------------------------------------
/**
* Detect cross-document relationships in a batch of parliamentary documents.
*
* Identifies five relationship types:
* - `responds-to` — Motion cites a government proposition
* - `implements` — Implementation document for an EU directive or treaty
* - `amends` — Document explicitly amending earlier legislation
* - `contradicts` — Documents with opposing policy positions
* - `related-topic` — Thematically related documents (≥2 shared domains)
*
* @param docs - Batch of documents to cross-reference
* @param precomputedMap - Optional pre-computed domain map (docId → domains).
* When provided, avoids redundant `detectPolicyDomains`
* calls for documents already analysed in `analyzeDocuments()`.
* @returns Deduplicated array of detected `DocumentLink` objects
*/
export function detectCrossDocumentLinks(
docs: RawDocument[],
precomputedMap?: Map<string, string[]>,
): DocumentLink[] {
if (docs.length < 2) return [];
// Re-use caller-provided domain map or build one from scratch
const domainCache = precomputedMap ?? new Map<string, string[]>();
if (!precomputedMap) {
for (const doc of docs) {
domainCache.set(docId(doc), detectPolicyDomains(doc, 'en'));
}
}
const all: DocumentLink[] = [
...detectRespondsTo(docs),
...detectImplements(docs, domainCache),
...detectAmends(docs),
...detectContradicts(docs),
...detectRelatedTopics(docs, domainCache),
];
return deduplicateLinks(all);
}
|