Press n or j to go to the next uncovered block, b, p or k for the previous block.
| 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | 42x 42x 61454x 61363x 84014x 42x 175568x 169170x 169170x 320x 320x 320x 169170x 46x 46x 46x 169170x 871x 169170x | /**
* @module Infrastructure/HTMLSanitization
* @description XSS-safe HTML entity escaping and decoding utilities.
* Bounded context: Infrastructure / Security
*
* @author Hack23 AB
* @license Apache-2.0
*/
const HTML_ENTITY_MAP: Readonly<Record<string, string>> = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
"'": ''',
} as const;
const HTML_ESCAPE_PATTERN = /[&<>"']/g;
/**
* Escape HTML special characters for safe inclusion in HTML/JSON-LD.
* Prevents XSS by converting &, <, >, ", ' to their HTML entity equivalents.
*
* @param text - Raw text to escape
* @returns Escaped text safe for HTML insertion; empty string for falsy input
*/
export function escapeHtml(text: string | null | undefined | number): string {
if (!text) return '';
return String(text).replace(
HTML_ESCAPE_PATTERN,
(m: string): string => HTML_ENTITY_MAP[m] ?? m,
);
}
/** Map of named HTML entities to their UTF-8 characters. */
const NAMED_ENTITY_MAP: Readonly<Record<string, string>> = {
'&': '&',
'<': '<',
'>': '>',
'"': '"',
''': "'",
' ': '\u00a0',
'—': '—',
'–': '–',
'‘': '\u2018',
'’': '\u2019',
'“': '\u201c',
'”': '\u201d',
'•': '•',
} as const;
/**
* Decode HTML numeric and named entities to their UTF-8 characters.
* Converts `ä` → `ä`, `ä` → `ä`, `&` → `&`, etc.
*
* Use this to normalize text extracted from HTML before further processing,
* preventing double-escaping when the text is later passed through escapeHtml().
*
* @param text - Text potentially containing HTML entities
* @returns Text with entities decoded to UTF-8; empty string for falsy input
*/
export function decodeHtmlEntities(text: string | null | undefined): string {
if (!text) return '';
let result = String(text);
// Decode numeric entities: ä → ä
result = result.replace(/&#(\d+);/g, (_match: string, code: string): string => {
const codePoint = parseInt(code, 10);
try {
return String.fromCodePoint(codePoint);
} catch {
return _match; // Keep invalid entities as-is
}
});
// Decode hex entities: ä → ä
result = result.replace(/&#x([0-9a-fA-F]+);/g, (_match: string, hex: string): string => {
const codePoint = parseInt(hex, 16);
try {
return String.fromCodePoint(codePoint);
} catch {
return _match;
}
});
// Decode named entities
result = result.replace(
/&(?:amp|lt|gt|quot|apos|nbsp|mdash|ndash|lsquo|rsquo|ldquo|rdquo|bull);/g,
(m: string): string => NAMED_ENTITY_MAP[m] ?? m,
);
return result;
}
|