Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 | 3x 3x 2x 3x 3x 3x | /**
* @module scripts/fetch-calendar/scraper/list-item
* @description Parser for the list-item-per-event Riksdag kalendarium markup.
*
* Pattern B (list-item-per-event):
* ```html
* <li class="calendar-list__item">
* <time datetime="2026-04-28T09:00:00">...</time>
* <span class="calendar-list__type">Utskottsmöte</span>
* <span class="calendar-list__organ">NU</span>
* <a href="/sv/...">Näringspolitik - Bredbands</a>
* </li>
* ```
*
* @author Hack23 AB
* @license Apache-2.0
*/
import type { CalendarEvent } from '../types.js';
import {
decodeHtmlEntities,
extractDataAttr,
extractDatetime,
extractHeadingAndLinks,
extractSpanText,
normalizeAkt,
normalizeOrgCode,
stripTags,
} from './extractors.js';
/** Parse an `<li>` calendar list item block. */
export function parseCalendarListItem(attrs: string, body: string): CalendarEvent | null {
const dtstart = extractDatetime(body);
if (!dtstart) return null;
const org =
extractDataAttr(attrs, 'organ') ??
extractSpanText(body, 'organ') ??
extractSpanText(body, 'committee') ??
'';
const akt =
extractDataAttr(attrs, 'akt') ??
extractSpanText(body, 'type') ??
extractSpanText(body, 'akt') ??
'';
const { summary, docRefs } = extractHeadingAndLinks(body);
return {
dtstart,
org: normalizeOrgCode(decodeHtmlEntities(org)),
akt: normalizeAkt(decodeHtmlEntities(akt)),
summary: decodeHtmlEntities(stripTags(summary).trim()),
doc_refs: docRefs,
source: 'web-fallback',
};
}
|