All files / scripts/agentic/gate-shared markdown-helpers.ts

100% Statements 39/39
100% Branches 18/18
100% Functions 3/3
100% Lines 35/35

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96                                10x     10x     10x     10x                 29x 29x   29x 1453x 8x 8x   1445x 51x 8x   51x   1394x     29x                     110x 1753x 1753x   448x 448x   19x                 42x 42x 42x 42x 675x 675x 159x 159x 130x 35x 35x     611x   42x    
/**
 * @module scripts/agentic/gate-shared/markdown-helpers
 * @description Shared markdown parsing primitives used by multiple gate
 *              checks (SWOT evidence, significance scoring, executive-brief
 *              H1 extraction, methodology-reflection section walker).
 *
 * Keeping these in one file (instead of duplicating the regexes across
 * five check modules) ensures the bash gate / TS gate parity rules stay
 * consistent — any change here propagates to every consumer.
 *
 * @see .github/prompts/05-analysis-gate.md — bash gate parity reference
 * @author Hack23 AB
 * @license Apache-2.0
 */
 
/** Bullet lines (`-` or `*` style). */
export const BULLET_RE = /^\s*[-*]\s+/;
 
/** Table row (starts with `|`). */
export const TABLE_ROW_RE = /^\s*\|/;
 
/** Table separator row (only `|`, `:`, `-`, whitespace). */
export const TABLE_SEP_RE = /^\s*[|:\-\s]+$/;
 
/** Any ATX heading H1-H6 (resets the active section context). */
export const ANY_HEADING_RE = /^#{1,6}\s+/;
 
/**
 * Strip HTML tags and common HTML entities from a heading value, then
 * collapse all whitespace. Used by the executive-brief H1 extractor to
 * normalise both Markdown `# …` headings and centered `<h1>…</h1>`
 * template blocks.
 */
export function stripHeadingMarkup(value: string): string {
  let text = '';
  let insideTag = false;
 
  for (const char of value) {
    if (char === '<') {
      insideTag = true;
      continue;
    }
    if (insideTag) {
      if (char === '>') {
        insideTag = false;
      }
      continue;
    }
    text += char;
  }
 
  return text.replace(/&nbsp;|&#160;/gi, ' ').replace(/\s+/g, ' ').trim();
}
 
/**
 * Test whether a markdown document contains an H2-H4 heading whose visible
 * text (after stripping a single optional leading emoji + whitespace)
 * matches the given pattern. Anchoring to a real heading prevents the
 * loose "anywhere in the file" matches that earlier versions of the
 * methodology-reflection gate allowed.
 */
export function hasHeading(content: string, pattern: RegExp): boolean {
  for (const rawLine of content.split('\n')) {
    const headingMatch = rawLine.match(/^#{2,4}\s+(.*?)\s*#*\s*$/);
    if (!headingMatch) continue;
    // Strip a single leading emoji (any non-ASCII glyph or symbol) plus optional whitespace.
    const text = headingMatch[1]!.replace(/^[^\p{L}\p{N}]+\s*/u, '').trim();
    if (pattern.test(text)) return true;
  }
  return false;
}
 
/**
 * Return the body of the section starting at the H2 heading that matches
 * `headingPattern`, up to (but not including) the next H2 heading. Returns
 * an empty string when the section is not present.
 */
export function extractSection(content: string, headingPattern: RegExp): string {
  const lines = content.split('\n');
  let inSection = false;
  const collected: string[] = [];
  for (const line of lines) {
    const headingMatch = line.match(/^##\s+(.*?)\s*#*\s*$/);
    if (headingMatch) {
      const text = headingMatch[1]!.replace(/^[^\p{L}\p{N}]+\s*/u, '').trim();
      if (inSection) break;
      if (headingPattern.test(text)) {
        inSection = true;
        continue;
      }
    }
    if (inSection) collected.push(line);
  }
  return collected.join('\n');
}