rss scanner.ts

94.44% Statements 34/36
92.85% Branches 13/14
100% Functions 3/3
94.11% Lines 32/34
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1x
1x
 
1x
1x
1x
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16x
 
16x
 
 
 
 
16x
 
97456x
 
16x
16x
97392x
97392x
97360x
97360x
97360x
8384x
 
97360x
 
 
 
16x
 
16x
8384x
 
 
8384x
 
8138x
8138x
 
8138x
8138x
96441x
88303x
 
 
 
 
 
 
8138x
 
 
 
 
 
 
 
 
 
 
 
 
 
37006x
 
16x
 
16x
 
  /**
 * @module Infrastructure/Rss/Scanner
 * @category Intelligence Operations / Supporting Infrastructure
 * @name News article scanner — language-aware with hreflang alternates
 *
 * @description
 * Scans `news/` (top level only — does **not** recurse into date-partitioned
 * subdirectories, matching legacy behaviour), groups files by base slug,
 * keeps only those that have a variant in the requested feed language
 * (defaulting to English), builds the alternate-language map for hreflang
 * link tags, sorts by pub date descending, and caps at `MAX_ITEMS` (50).
 * Title/description are read from the per-language article HTML so each
 * localized feed carries localized item metadata. Returns the list ready
 * to be rendered into RSS `<item>` blocks.
 *
 * Round-6 split: extracted from `scripts/generate-rss.ts`.
 *
 * @author Hack23 AB (Infrastructure Team)
 * @license Apache-2.0
 */
 
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
 
import type { Language } from '../types/language.js';
 
import { extractArticleMeta } from './article-meta.js';
 
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
 
const BASE_URL = 'https://riksdagsmonitor.com';
const NEWS_DIR = path.join(__dirname, '..', '..', 'news');
const MAX_ITEMS = 50;
 
/** A single RSS feed item with its multi-language alternate links. */
export interface RssArticle {
  file: string;
  title: string;
  description: string;
  link: string;
  pubDate: string;
  baseSlug: string;
  lang: Language;
  author: string;
  category: string;
  alternateLanguages: Array<{ lang: Language; href: string }>;
}
 
/**
 * Get news articles for an RSS feed in the requested `feedLang`.
 *
 * Each returned item is anchored on the article variant that actually
 * exists in `feedLang` (so the `<link>`/`<guid>` always point at a real
 * file) and carries the localized title/description extracted from that
 * variant's HTML. Article groups without a `feedLang` variant are
 * skipped. The other language variants present for the same base slug
 * become the `alternateLanguages` hreflang siblings.
 *
 * Defaults to English (`'en'`) so the legacy `rss.xml` output is
 * unchanged.
 */
export function getRssArticles(feedLang: Language = 'en'): RssArticle[] {
  console.log(`📰 Scanning news directory for RSS articles (${feedLang})...`);
 
  Iif (!fs.existsSync(NEWS_DIR)) {
    console.warn('⚠️ News directory not found');
    return [];
  }
 
  const files = fs
    .readdirSync(NEWS_DIR)
    .filter((file) => file.endsWith('.html') && file !== 'index.html' && !file.startsWith('index_'));
 
  const articleGroups = new Map<string, Map<Language, string>>();
  for (const file of files) {
    const match = file.match(/^(.+?)-(en|sv|da|no|fi|de|fr|es|nl|ar|he|ja|ko|zh)\.html$/);
    if (match) {
      const baseSlug = match[1]!;
      const lang = match[2]! as Language;
      if (!articleGroups.has(baseSlug)) {
        articleGroups.set(baseSlug, new Map());
      }
      articleGroups.get(baseSlug)!.set(lang, file);
    }
  }
 
  const articles: RssArticle[] = [];
 
  for (const [baseSlug, langMap] of articleGroups) {
    const primaryFile = langMap.get(feedLang);
    // Only emit an item when the requested language variant exists on
    // disk — guarantees the feed never links to a missing page.
    if (!primaryFile) continue;
 
    const filePath = path.join(NEWS_DIR, primaryFile);
    const meta = extractArticleMeta(filePath);
 
    const alternates: Array<{ lang: Language; href: string }> = [];
    for (const [lang, altFile] of langMap) {
      if (lang !== feedLang) {
        alternates.push({
          lang,
          href: `${BASE_URL}/news/${altFile}`,
        });
      }
    }
 
    articles.push({
      file: primaryFile,
      title: meta.title,
      description: meta.description,
      link: `${BASE_URL}/news/${primaryFile}`,
      pubDate: meta.pubDate,
      baseSlug,
      lang: feedLang,
      author: meta.author,
      category: meta.category,
      alternateLanguages: alternates,
    });
  }
 
  articles.sort((a, b) => new Date(b.pubDate).getTime() - new Date(a.pubDate).getTime());
 
  console.log(`  Found ${articles.length} ${feedLang} articles with multi-language alternates`);
 
  return articles.slice(0, MAX_ITEMS);
}