All files / scripts/sitemap-xml/scanners news.ts

93.93% Statements 31/33
85% Branches 17/20
100% Functions 4/4
93.54% Lines 29/31

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96                                                  2x 2x   2x                           14x   14x           14x     40166x 42x 39270x 28x 39242x 38864x 38864x 38864x 38864x 38864x 38864x 38864x     38864x 38864x   38864x 4550x           34314x 34314x 14x       38864x           14x   14x   4536x    
/**
 * @module Infrastructure/SitemapXml/Scanners/News
 * @category Intelligence Operations / Supporting Infrastructure
 * @name News article scanner — base-slug grouped
 *
 * @description
 * Walks `news/` recursively and groups files by their base slug (without
 * the language suffix) so each article becomes a single `ArticleGroup`
 * with a list of available languages and a unified `lastmod`. The unified
 * `lastmod` is the **maximum** git timestamp across the article's
 * language variants. Sorted alphabetically by base slug for stable XML
 * output.
 *
 * Round-6 split: extracted from `scripts/generate-sitemap.ts`.
 *
 * @author Hack23 AB (Infrastructure Team)
 * @license Apache-2.0
 */
 
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
 
import { getFileModTime } from '../git-timestamps.js';
 
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
 
const NEWS_DIR = path.join(__dirname, '..', '..', '..', 'news');
 
/** Grouped article descriptor: one entry per base slug across all languages. */
export interface ArticleGroup {
  baseSlug: string;
  languages: string[];
  lastmod: string;
}
 
/**
 * Get news articles with metadata.
 * Supports date-based subdirectory structure: news/{year}/{month}/article.html
 */
export function getNewsArticles(): ArticleGroup[] {
  console.log('📰 Scanning news directory...');
 
  Iif (!fs.existsSync(NEWS_DIR)) {
    console.warn('⚠️ News directory not found');
    return [];
  }
 
  // Group articles by base slug (without language suffix)
  const articles = new Map<string, ArticleGroup>();
 
  function scanDir(dir: string): void {
    const entries = fs.readdirSync(dir, { withFileTypes: true }).sort((a, b) => a.name.localeCompare(b.name));
    for (const entry of entries) {
      if (entry.isDirectory()) {
        scanDir(path.join(dir, entry.name));
      } else if (entry.isFile() && entry.name !== 'index.html' && !entry.name.startsWith('index_') && entry.name.endsWith('.html')) {
        const file = entry.name;
        const match = file.match(/^(.+?)-(en|sv|da|no|fi|de|fr|es|nl|ar|he|ja|ko|zh)\.html$/);
        Eif (match) {
          const baseSlug = match[1]!;
          const lang = match[2]!;
          const filePath = path.join(dir, file);
          const fileModTime = getFileModTime(filePath);
 
          // Include subdirectory prefix in baseSlug (e.g., "2026/02/2026-02-13-article")
          const relDir = path.relative(NEWS_DIR, dir).split(path.sep).join('/');
          const fullBaseSlug = relDir ? `${relDir}/${baseSlug}` : baseSlug;
 
          if (!articles.has(fullBaseSlug)) {
            articles.set(fullBaseSlug, {
              baseSlug: fullBaseSlug,
              languages: [],
              lastmod: fileModTime,
            });
          } else {
            const article = articles.get(fullBaseSlug)!;
            if (!article.lastmod || new Date(fileModTime) > new Date(article.lastmod)) {
              article.lastmod = fileModTime;
            }
          }
 
          articles.get(fullBaseSlug)!.languages.push(lang);
        }
      }
    }
  }
 
  scanDir(NEWS_DIR);
 
  console.log(`  Found ${articles.size} news article groups`);
 
  return Array.from(articles.values()).sort((a, b) => a.baseSlug.localeCompare(b.baseSlug));
}