Source: scripts/article-quality-enhancer.js

#!/usr/bin/env node

/**
 * @module Intelligence/ContentQuality
 * @category Intelligence Operations / Supporting Infrastructure
 * @name Article Quality Enhancer - Economist-Style Content Validation Framework
 * 
 * @description
 * Comprehensive content quality assurance system implementing The Economist-style editorial standards
 * for political journalism. This utility validates article structure, analytical depth, source attribution,
 * and perspective diversity to ensure intelligence reporting meets rigorous transparency requirements.
 * 
 * Core Functionality:
 * - Validates analytical depth scoring (min 0.6 threshold) assessing complexity of political analysis
 * - Enforces source quality standards requiring minimum 3 cross-referenced government documents/debates
 * - Counts unique party perspectives (min 4 parties) ensuring balanced coverage across Swedish coalition
 * - Detects "Why This Matters" contextual sections explaining political significance for readers
 * - Validates historical context references connecting current events to parliamentary timeline
 * - Calculates composite quality score (0.0-1.0 scale, min 0.75 threshold) for publication readiness
 * 
 * Intelligence Operations Integration:
 * - Supports ongoing parliamentary monitoring by maintaining content quality standards
 * - Enables identification of systematic bias in coverage across party spectrum
 * - Tracks journalistic methodology compliance with editorial guidelines
 * - Documents source diversity for audit trails and GDPR data provenance
 * 
 * Content Quality Standards:
 * - Minimum analytical depth: 0.6 (evaluates substantive policy discussion)
 * - Minimum cross-references: 3 (government documents, committee reports, voting records)
 * - Minimum party perspectives: 4 (ensures broad political spectrum coverage)
 * - Requires "Why This Matters" section for context and reader understanding
 * - Recommends historical context linking current developments to past decisions
 * - Minimum publication quality score: 0.75 (composite metric of all dimensions)
 * 
 * Integration Points:
 * - Consumed by news generation pipeline for automated quality gates
 * - Referenced in editorial workflows for manual content enhancement
 * - Used by CI/CD validation scripts (validate-news-translations.js, validate-articles-playwright.js)
 * - Feeds metrics into intelligence dashboard for quality trending
 * 
 * Data Handling:
 * - Processes publicly available parliamentary records and published journalism
 * - No processing of personal data (operates on aggregated article metrics)
 * - Complies with ISO 27001:2022 A.14.2.1 (supply chain controls on content quality)
 * - Supports GDPR Article 5 (transparency: all quality metrics documented)
 * 
 * Usage:
 *   import { enhanceArticleQuality } from './article-quality-enhancer.js';
 *   const result = await enhanceArticleQuality(articlePath, options);
 *   // Returns: { qualityScore, analyticalDepth, partyCount, hasWhyThis, issues }
 * 
 * @intelligence Core utility for content validation in political journalism platform
 * @osint Validates open-source political documentation (government records, voting history)
 * @risk Content quality failure may result in publication of unsubstantiated claims
 * @gdpr No personal data processing; operates on published content only
 * @security HTML/JavaScript injection prevented through article parsing
 * 
 * @author Hack23 AB (Content Intelligence Team)
 * @license Apache-2.0
 * @version 2.0.0
 * @see Issue #150 (News Realtime Monitor Enhancement)
 * @see The Economist Editorial Standards
 * @see GDPR Article 6(1)(e) - Public Interest Processing for political transparency
 * @see ISO 27001:2022 A.14.2.1 - Supply chain information security
 */

import fs from 'fs';

/**
 * Default quality thresholds based on The Economist standards
 */
const DEFAULT_THRESHOLDS = {
  minQualityScore: 0.75,
  minAnalyticalDepth: 0.6,
  minPartySources: 4,
  minCrossReferences: 3,
  requireWhyThisMatters: true,
  recommendHistoricalContext: true,
  recommendInternationalComparison: false
};

/**
 * Map of normalized party codes to their common name variants
 * This prevents double-counting when both full names and abbreviations appear
 */
const PARTY_VARIANTS = {
  S: ['Socialdemokraterna', 'Social Democrats', 'S'],
  M: ['Moderaterna', 'Moderate', 'M'],
  SD: ['Sverigedemokraterna', 'Sweden Democrats', 'SD'],
  C: ['Centerpartiet', 'Centre Party', 'C'],
  V: ['Vänsterpartiet', 'Left Party', 'V'],
  KD: ['Kristdemokraterna', 'Christian Democrats', 'KD'],
  L: ['Liberalerna', 'Liberals', 'L'],
  MP: ['Miljöpartiet', 'Green Party', 'MP']
};

/**
 * Riksdag/Regering document ID patterns
 */
const DOCUMENT_ID_PATTERNS = [
  /\b[A-Z]{1,3}\d{1,4}\/\d{2}:\d+\b/g,  // Committee reports: AU10/24:1
  /\bProp\.\s*\d{4}\/\d{2}:\d+\b/gi,     // Propositions: Prop. 2024/25:1
  /\bBet\.\s*\d{4}\/\d{2}:[A-Z]{1,3}\d+\b/gi,  // Committee reports: Bet. 2024/25:FiU10
  /\bMot\.\s*\d{4}\/\d{2}:\d+\b/gi,      // Motions: Mot. 2024/25:123
  /\bIP\s*\d{4}\/\d{2}:\d+\b/gi,         // Interpellations: IP 2024/25:45
  /\bFr\.\s*\d{4}\/\d{2}:\d+\b/gi        // Questions: Fr. 2024/25:67
];

/**
 * Assess analytical depth of article content
 * 
 * Looks for:
 * - Causal reasoning ("because", "therefore", "as a result")
 * - Comparative analysis ("compared to", "in contrast", "while")
 * - Trend analysis ("trend", "pattern", "shift")
 * - Evidence-based claims (references to data, studies, reports)
 * - Multiple perspectives (quotes from different actors)
 * 
 * @param {string} content - HTML content of article
 * @returns {number} Score 0.0-1.0
 */
function assessAnalyticalDepth(content) {
  const text = stripHtml(content).toLowerCase();
  let score = 0.0;
  
  // Causal reasoning indicators (0.2 max)
  const causalWords = ['because', 'therefore', 'as a result', 'consequently', 'due to', 'leads to', 'caused by'];
  const causalCount = causalWords.filter(word => text.includes(word)).length;
  score += Math.min(causalCount * 0.04, 0.2);
  
  // Comparative analysis (0.2 max)
  const comparativeWords = ['compared to', 'in contrast', 'while', 'whereas', 'on the other hand', 'however'];
  const comparativeCount = comparativeWords.filter(word => text.includes(word)).length;
  score += Math.min(comparativeCount * 0.04, 0.2);
  
  // Trend/pattern analysis (0.2 max)
  const trendWords = ['trend', 'pattern', 'shift', 'change', 'evolution', 'development'];
  const trendCount = trendWords.filter(word => text.includes(word)).length;
  score += Math.min(trendCount * 0.04, 0.2);
  
  // Evidence-based claims (0.2 max)
  const evidenceWords = ['data shows', 'according to', 'study', 'report', 'statistics', 'evidence'];
  const evidenceCount = evidenceWords.filter(word => text.includes(word)).length;
  score += Math.min(evidenceCount * 0.04, 0.2);
  
  // Multiple perspectives (0.2 max) - count quotes
  const quoteCount = (content.match(/<blockquote>/gi) || []).length + 
                     (text.match(/"\w/g) || []).length / 2;
  score += Math.min(quoteCount * 0.04, 0.2);
  
  return Math.min(score, 1.0);
}

/**
 * Count unique party perspectives mentioned in article
 * 
 * Uses PARTY_VARIANTS pattern to prevent double-counting when both
 * full names and abbreviations appear in the same text.
 * 
 * @param {string} content - HTML content of article
 * @returns {number} Number of unique parties mentioned
 */
function countPartyPerspectives(content) {
  const text = content;
  const partiesFound = new Set();
  
  // Iterate through party codes and check all variants
  Object.entries(PARTY_VARIANTS).forEach(([code, variants]) => {
    // Check if any variant of this party is mentioned
    for (const variant of variants) {
      const regex = new RegExp(`\\b${variant}\\b`, 'gi');
      if (regex.test(text)) {
        partiesFound.add(code);
        break; // Stop checking variants once party is found
      }
    }
  });
  
  return partiesFound.size;
}

/**
 * Count cross-referenced Riksdag/Regering documents
 * 
 * @param {string} content - HTML content of article
 * @returns {number} Number of unique document IDs found
 */
function countCrossReferences(content) {
  const documentIds = new Set();
  
  DOCUMENT_ID_PATTERNS.forEach(pattern => {
    const matches = content.match(pattern) || [];
    matches.forEach(match => documentIds.add(match));
  });
  
  return documentIds.size;
}

/**
 * Detect "Why This Matters" section
 * 
 * @param {string} content - HTML content of article
 * @returns {boolean} True if section exists
 */
function hasWhyThisMatters(content) {
  const patterns = [
    /why\s+this\s+matters/i,
    /varför\s+detta\s+betyder/i,
    /betydelse/i,
    /implications/i,
    /konsekvenser/i
  ];
  
  return patterns.some(pattern => pattern.test(content));
}

/**
 * Detect historical context
 * 
 * @param {string} content - HTML content of article
 * @returns {boolean} True if historical context present
 */
function hasHistoricalContext(content) {
  const text = stripHtml(content).toLowerCase();
  const patterns = [
    /historically/i,
    /in \d{4}/,
    /since \d{4}/,
    /tidigare/i,
    /historiskt/i
  ];
  
  return patterns.some(pattern => pattern.test(text));
}

/**
 * Detect international comparison
 * 
 * @param {string} content - HTML content of article
 * @returns {boolean} True if international comparison present
 */
function hasInternationalComparison(content) {
  const text = stripHtml(content).toLowerCase();
  const patterns = [
    /compared to.*country/i,
    /international.*comparison/i,
    /other.*countries/i,
    /jämfört med.*länder/i,
    /internationell.*jämförelse/i
  ];
  
  return patterns.some(pattern => pattern.test(text));
}

/**
 * Strip HTML tags from content
 * 
 * @param {string} html - HTML content
 * @returns {string} Plain text
 */
function stripHtml(html) {
  return html.replace(/<[^>]*>/g, ' ').replace(/\s+/g, ' ').trim();
}

/**
 * Calculate overall quality score
 * 
 * Weights:
 * - Analytical depth: 30%
 * - Party perspectives: 25%
 * - Cross-references: 25%
 * - "Why This Matters": 10%
 * - Historical context: 5%
 * - International comparison: 5%
 * 
 * @param {Object} metrics - Individual quality metrics
 * @returns {number} Overall score 0.0-1.0
 */
function calculateQualityScore(metrics) {
  const weights = {
    analyticalDepth: 0.30,
    partyPerspectives: 0.25,
    crossReferences: 0.25,
    whyThisMatters: 0.10,
    historicalContext: 0.05,
    internationalComparison: 0.05
  };
  
  let score = 0.0;
  
  // Analytical depth (already 0-1)
  score += metrics.analyticalDepth * weights.analyticalDepth;
  
  // Party perspectives (normalize: 4+ parties = 1.0)
  score += Math.min(metrics.partyCount / 4, 1.0) * weights.partyPerspectives;
  
  // Cross-references (normalize: 3+ refs = 1.0)
  score += Math.min(metrics.crossReferences / 3, 1.0) * weights.crossReferences;
  
  // Binary checks
  score += (metrics.hasWhyThisMatters ? 1.0 : 0.0) * weights.whyThisMatters;
  score += (metrics.hasHistoricalContext ? 1.0 : 0.0) * weights.historicalContext;
  score += (metrics.hasInternationalComparison ? 1.0 : 0.0) * weights.internationalComparison;
  
  return Math.min(score, 1.0);
}

/**
 * Enhance article quality and validate against thresholds
 * 
 * @param {string} articlePath - Path to article HTML file
 * @param {Object} thresholds - Quality thresholds (optional)
 * @returns {Object} Quality assessment result
 */
export async function enhanceArticleQuality(articlePath, thresholds = {}) {
  const options = { ...DEFAULT_THRESHOLDS, ...thresholds };
  
  if (!fs.existsSync(articlePath)) {
    return {
      passed: false,
      error: 'Article file not found',
      articlePath
    };
  }
  
  const content = fs.readFileSync(articlePath, 'utf-8');
  
  // Collect metrics
  const metrics = {
    analyticalDepth: assessAnalyticalDepth(content),
    partyCount: countPartyPerspectives(content),
    crossReferences: countCrossReferences(content),
    hasWhyThisMatters: hasWhyThisMatters(content),
    hasHistoricalContext: hasHistoricalContext(content),
    hasInternationalComparison: hasInternationalComparison(content)
  };
  
  // Calculate overall score
  const qualityScore = calculateQualityScore(metrics);
  
  // Validate against thresholds
  const issues = [];
  
  if (qualityScore < options.minQualityScore) {
    issues.push(`Quality score ${qualityScore.toFixed(2)} below threshold ${options.minQualityScore}`);
  }
  
  if (metrics.analyticalDepth < options.minAnalyticalDepth) {
    issues.push(`Analytical depth ${metrics.analyticalDepth.toFixed(2)} below threshold ${options.minAnalyticalDepth}`);
  }
  
  if (metrics.partyCount < options.minPartySources) {
    issues.push(`Only ${metrics.partyCount} party perspectives (need ${options.minPartySources})`);
  }
  
  if (metrics.crossReferences < options.minCrossReferences) {
    issues.push(`Only ${metrics.crossReferences} cross-references (need ${options.minCrossReferences})`);
  }
  
  if (options.requireWhyThisMatters && !metrics.hasWhyThisMatters) {
    issues.push('Missing "Why This Matters" section');
  }
  
  // Separate warnings (recommendations) from blocking failures
  const warnings = [];
  
  if (options.recommendHistoricalContext && !metrics.hasHistoricalContext) {
    warnings.push('Recommended: Add historical context');
  }
  
  if (options.recommendInternationalComparison && !metrics.hasInternationalComparison) {
    warnings.push('Recommended: Add international comparison');
  }
  
  return {
    passed: issues.length === 0, // Only blocking issues affect passed status
    qualityScore,
    metrics,
    issues,
    warnings, // Non-blocking recommendations
    thresholds: options,
    articlePath
  };
}

/**
 * Batch enhance multiple articles
 * 
 * @param {string[]} articlePaths - Array of article paths
 * @param {Object} thresholds - Quality thresholds
 * @returns {Object[]} Array of quality results
 */
export async function batchEnhanceQuality(articlePaths, thresholds = {}) {
  const results = [];
  
  for (const articlePath of articlePaths) {
    const result = await enhanceArticleQuality(articlePath, thresholds);
    results.push(result);
  }
  
  return results;
}

// Export individual assessment functions for testing
export {
  assessAnalyticalDepth,
  countPartyPerspectives,
  countCrossReferences,
  hasWhyThisMatters,
  hasHistoricalContext,
  hasInternationalComparison,
  calculateQualityScore,
  DEFAULT_THRESHOLDS
};