All files / scripts party-variants.js

100% Statements 12/12
100% Branches 4/4
100% Functions 1/1
100% Lines 12/12

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159                                                                                                                                                                                                                                  2x                                 64x   64x 3x     61x 488x   751x             751x     751x 244x 244x         61x    
/**
 * @module Intelligence/PartyAnalysis
 * @category Intelligence Operations / Supporting Infrastructure
 * @name Party Variants - Political Party Name Normalization Framework
 * 
 * @description
 * Essential party name normalization utility providing canonical mappings of Swedish
 * political party names to their abbreviated codes. Prevents double-counting and
 * analytical bias when multiple party name variants appear in the same content.
 * Critical infrastructure for accurate party perspective analysis and coalition dynamics tracking.
 * 
 * Political Party Context (Swedish Riksdag - 8 parties):
 * - Socialdemokraterna (S): Left-wing social democratic party
 * - Moderaterna (M): Right-wing conservative/liberal party
 * - Sverigedemokraterna (SD): Right-wing populist/nationalist party
 * - Vänsterpartiet (V): Far-left communist party
 * - Miljöpartiet (MP): Green party (left-wing)
 * - Centerpartiet (C): Centrist/rural representation party
 * - Liberalerna (L): Classical liberal party
 * - Kristdemokraterna (KD): Christian democratic party
 * 
 * Party Naming Challenges:
 * Articles often reference parties by multiple name variants (full Swedish name,
 * English translation, formal abbreviation). Example: "Moderaterna" = "Moderate Party" = "M".
 * Without normalization, party mention counting becomes inaccurate, leading to:
 * - Overstated party perspective counts (same party counted multiple times)
 * - Analytical bias (some parties may use translated names more frequently)
 * - Quality metrics misalignment (perspective diversity falsely inflated)
 * 
 * Normalization Strategy:
 * - Maps all party name variants to single canonical party code (S, M, SD, V, MP, C, L, KD)
 * - Prevents double-counting when both full names and abbreviations appear
 * - Ensures consistent party perspective analysis across content
 * - Enables reliable coalition analysis based on party groupings
 * 
 * Core Data Structure:
 * PARTY_VARIANTS object maps canonical codes to arrays of name variants:
 * {
 *   'S': ['Socialdemokraterna', 'S'],
 *   'M': ['Moderaterna', 'M'],
 *   'SD': ['Sverigedemokraterna', 'SD'],
 *   // ... etc for all 8 parties
 * }
 * 
 * Text Matching Implementation:
 * extractPartyMentions() function implements robust text search:
 * - Uses Unicode-aware regex boundaries (\p{L}\p{N}) for proper word detection
 * - Handles non-ASCII characters: ä, ö, å in Swedish names
 * - Prevents false matches: searching "M" doesn't match "Moderaterna"'s M
 * - Searches HTML content directly (works with generated articles)
 * - Returns Set<string> of canonical party codes found
 * 
 * Integration Usage Across Codebase:
 * - article-quality-enhancer.js: Counts unique parties for perspective diversity metric
 * - validate-evening-analysis.js: Extracts party mentions for analytical scoring
 * - news-evening-analysis.test.js: Tests party mention extraction accuracy
 * - Intelligence dashboards: Party affiliation tracking and coalition analysis
 * 
 * Intelligence Applications:
 * - Coalition dynamics analysis: Track party alliances and opposition blocs
 * - Perspective diversity measurement: Ensure balanced party coverage
 * - Political polarization tracking: Identify party positioning shifts
 * - Media bias detection: Identify systematic under/over-coverage of parties
 * - Electoral analysis: Monitor party popularity and messaging
 * 
 * Unicode Handling for Swedish Characters:
 * Pattern: (?:^|[^\p{L}\p{N}])VARIANT(?=$|[^\p{L}\p{N}])
 * - \p{L}: Unicode letter (handles ä, ö, å, and all other scripts)
 * - \p{N}: Unicode number
 * - Prevents matching party codes inside word boundaries
 * - Works across all languages despite Swedish character specifics
 * 
 * Boolean Flags for Language Variety:
 * If future expansion includes English translations or other languages:
 * - S: ['Socialdemokraterna', 'S', 'Social Democrats'] (if needed)
 * - M: ['Moderaterna', 'M', 'Moderate Party']
 * - Extension maintains backward compatibility
 * 
 * Data Protection:
 * - No personal identifiers stored or processed
 * - Operates on published party entity references only
 * - Complies with GDPR by not identifying individual politicians
 * - Audit trail of political party mentions in articles
 * 
 * ISMS Compliance:
 * - ISO 27001:2022 A.12.2.1 (change log maintenance - version control)
 * - NIST CSF 2.0 PR.DS-1 (data classification - public content)
 * 
 * Functions:
 * - extractPartyMentions(html): Searches HTML content for party references
 *   Input: HTML string from article
 *   Output: Set<string> of canonical party codes (e.g., new Set(['S', 'M', 'SD']))
 * 
 * Usage Example:
 *   import { extractPartyMentions } from './party-variants.js';
 *   const parties = extractPartyMentions(articleHtml);
 *   const uniquePartyCount = parties.size;  // 0-8
 *   const hasBalancedCoverage = uniquePartyCount >= 4;  // Min 4 parties threshold
 * 
 * @intelligence Core utility for accurate party perspective analysis
 * @osint Analyzes public political party references from news coverage
 * @risk Inaccurate normalization leads to biased party perspective counts
 * @gdpr No personal data processing (political entity references only)
 * @security Case-sensitive matching for proper abbreviation detection
 * 
 * @author Hack23 AB (Political Intelligence Team)
 * @license Apache-2.0
 * @version 1.8.0
 * @see article-quality-enhancer.js (primary consumer)
 * @see validate-evening-analysis.js (party mention validation)
 * @see Swedish political party system structure
 */
 
export const PARTY_VARIANTS = {
  S: ['Socialdemokraterna', 'S'],
  M: ['Moderaterna', 'M'],
  SD: ['Sverigedemokraterna', 'SD'],
  V: ['Vänsterpartiet', 'V'],
  MP: ['Miljöpartiet', 'MP'],
  C: ['Centerpartiet', 'C'],
  L: ['Liberalerna', 'L'],
  KD: ['Kristdemokraterna', 'KD']
};
 
/**
 * Extract unique party mentions from HTML content
 * @param {string} html - HTML content to search
 * @returns {Set<string>} - Set of canonical party codes found
 */
export function extractPartyMentions(html) {
  const parties = new Set();
  
  if (!html) {
    return parties;
  }
  
  for (const [canonicalCode, variants] of Object.entries(PARTY_VARIANTS)) {
    for (const variant of variants) {
      // Escape special regex characters in variant
      const escapedVariant = variant.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
      
      // Use Unicode-aware non-letter/non-number boundary for ALL variants.
      // This handles HTML tags (>), parentheses, punctuation, whitespace etc.
      // \b doesn't work well with non-ASCII (ä, ö, å) so we use [^\p{L}\p{N}].
      // For short codes (S, M, V, C, L, MP, SD, KD), this prevents matching
      // inside words like "Sörling", "USA", or "MP" when looking for "M".
      const pattern = new RegExp(
        `(?:^|[^\\p{L}\\p{N}])${escapedVariant}(?=$|[^\\p{L}\\p{N}])`, 'ui'
      );
      if (pattern.test(html)) {
        parties.add(canonicalCode);
        break;
      }
    }
  }
  
  return parties;
}