scraper list-item.ts

100% Statements 7/7
60% Branches 6/10
100% Functions 1/1
100% Lines 6/6
Press n or j to go to the next uncovered block, b, p or k for the previous block.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59  
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3x
3x
 
2x
 
 
 
 
 
3x
 
 
 
 
 
3x
 
3x
 
 
 
 
 
 
 
 
  /**
 * @module scripts/fetch-calendar/scraper/list-item
 * @description Parser for the list-item-per-event Riksdag kalendarium markup.
 *
 * Pattern B (list-item-per-event):
 * ```html
 * <li class="calendar-list__item">
 *   <time datetime="2026-04-28T09:00:00">...</time>
 *   <span class="calendar-list__type">Utskottsmöte</span>
 *   <span class="calendar-list__organ">NU</span>
 *   <a href="/sv/...">Näringspolitik - Bredbands</a>
 * </li>
 * ```
 *
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
import type { CalendarEvent } from '../types.js';
import {
  decodeHtmlEntities,
  extractDataAttr,
  extractDatetime,
  extractHeadingAndLinks,
  extractSpanText,
  normalizeAkt,
  normalizeOrgCode,
  stripTags,
} from './extractors.js';
 
/** Parse an `<li>` calendar list item block. */
export function parseCalendarListItem(attrs: string, body: string): CalendarEvent | null {
  const dtstart = extractDatetime(body);
  if (!dtstart) return null;
 
  const org =
    extractDataAttr(attrs, 'organ') ??
    extractSpanText(body, 'organ') ??
    extractSpanText(body, 'committee') ??
    '';
 
  const akt =
    extractDataAttr(attrs, 'akt') ??
    extractSpanText(body, 'type') ??
    extractSpanText(body, 'akt') ??
    '';
 
  const { summary, docRefs } = extractHeadingAndLinks(body);
 
  return {
    dtstart,
    org: normalizeOrgCode(decodeHtmlEntities(org)),
    akt: normalizeAkt(decodeHtmlEntities(akt)),
    summary: decodeHtmlEntities(stripTags(summary).trim()),
    doc_refs: docRefs,
    source: 'web-fallback',
  };
}