All files / src/browser/cia csv-utils.ts

98.21% Statements 55/56
90.32% Branches 28/31
100% Functions 7/7
100% Lines 47/47

Press n or j to go to the next uncovered block, b, p or k for the previous block.

1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156                                                                        57x 57x   272x 56x   56x 165x 165x     165x 165x 165x 165x 4538x 4538x 60x 4478x 761x 761x   3717x     165x   165x 165x 926x 926x 926x   165x   165x                                 85x 84x 83x 83x 83x                                                     38x 38x 37x     38x 43x 43x 42x 34x 34x 34x   1x 1x       4x 4x                           20x 36x    
/**
 * @module CIA/CSVUtils
 * @category Intelligence Platform - Data Acquisition & Pipeline Management
 *
 * @description
 * CSV parsing and HTTP loading helpers for the CIA data pipeline.
 *
 * Originally lived as private/public methods on `CIADataLoader`; extracted as
 * free functions so each per-domain loader (`loaders/*.ts`) can use them
 * directly without instantiating the orchestrator class. Mockable in tests
 * by stubbing `globalThis.fetch`.
 *
 * @author Hack23 AB - Data Pipeline Engineering
 * @license Apache-2.0
 * @since 2026
 */
 
import type { CSVRow } from './types.js';
 
/** Signature used by per-domain loaders to fetch and parse a single CSV. */
export type LoadCSV = (
  localPath: string,
  fallbackPath?: string
) => Promise<CSVRow[]>;
 
/**
 * Parse CSV text into an array of header-keyed rows with auto-typed values.
 *
 * Handles basic quoting (`"value, with, commas"`) and converts numeric strings
 * to numbers. Empty cells are kept as the empty string. Returns `[]` for
 * inputs without at least a header and one data row.
 *
 * @param csvText - Raw CSV text
 * @returns Parsed rows (empty array if header-only or empty)
 */
export function parseCSV(csvText: string): CSVRow[] {
  const lines = csvText.trim().split('\n');
  if (lines.length < 2) return [];
 
  const headers = lines[0].split(',').map(h => h.trim().replace(/^"|"$/g, ''));
  const rows: CSVRow[] = [];
 
  for (let i = 1; i < lines.length; i++) {
    const line = lines[i].trim();
    Iif (!line) continue;
 
    // Simple CSV parsing (handles basic quoting)
    const values: string[] = [];
    let current = '';
    let inQuotes = false;
    for (let j = 0; j < line.length; j++) {
      const ch = line[j];
      if (ch === '"') {
        inQuotes = !inQuotes;
      } else if (ch === ',' && !inQuotes) {
        values.push(current.trim());
        current = '';
      } else {
        current += ch;
      }
    }
    values.push(current.trim());
 
    const row: CSVRow = {};
    headers.forEach((h, idx) => {
      const val = values[idx] || '';
      const num = Number(val);
      row[h] = val !== '' && !isNaN(num) ? num : val;
    });
    rows.push(row);
  }
  return rows;
}
 
/**
 * Join a base URL with a relative path using exactly one slash between them.
 *
 * Tolerant of base URLs that omit the trailing slash and of paths that include
 * a leading slash — both common foot-guns when callers concatenate URL strings
 * by hand. Empty inputs are passed through: an empty `base` returns `path`
 * unchanged, and an empty `path` returns `base` unchanged (useful for directory
 * listings or when the path is computed conditionally).
 *
 * @param base - Base URL (with or without trailing slash); empty string returns `path` as-is
 * @param path - Relative path (with or without leading slash); empty string returns `base` as-is
 * @returns The two segments joined by exactly one `/`, or whichever input is non-empty when one is empty
 */
export function joinURL(base: string, path: string): string {
  if (!base) return path;
  if (!path) return base;
  const trimmedBase = base.replace(/\/+$/, '');
  const trimmedPath = path.replace(/^\/+/, '');
  return `${trimmedBase}/${trimmedPath}`;
}
 
/**
 * Load a CSV with local-first fallback.
 *
 * Tries `joinURL(csvBaseURL, localPath)` first. If the response is non-OK or
 * yields zero rows, falls back to `joinURL(fallbackURL, fallbackPath ?? localPath)`
 * when a `fallbackURL` is provided. Network errors are logged as warnings
 * rather than thrown so the page can degrade gracefully when offline.
 *
 * URL joining is tolerant of missing trailing slashes on the base URL and
 * leading slashes on the path; callers do not need to worry about the slash
 * boundary.
 *
 * @param csvBaseURL - Base URL for the local-first CSV directory
 * @param fallbackURL - Optional remote fallback URL (e.g. raw.githubusercontent.com mirror); empty string disables fallback
 * @param localPath - Path relative to `csvBaseURL`
 * @param fallbackPath - Optional path on the fallback host (defaults to `localPath`)
 * @returns Parsed CSV rows; `[]` when no source returned data
 */
export async function loadCSV(
  csvBaseURL: string,
  fallbackURL: string,
  localPath: string,
  fallbackPath?: string
): Promise<CSVRow[]> {
  const urls: string[] = [joinURL(csvBaseURL, localPath)];
  if (fallbackURL) {
    urls.push(joinURL(fallbackURL, fallbackPath ?? localPath));
  }
 
  for (const url of urls) {
    try {
      const response = await fetch(url);
      if (!response.ok) continue;
      const text = await response.text();
      const rows = parseCSV(text);
      Eif (rows.length > 0) return rows;
    } catch (e: unknown) {
      const message = e instanceof Error ? e.message : String(e);
      console.warn(`Failed to load CSV from ${url}:`, message);
    }
  }
 
  console.warn(`No data loaded for ${localPath}`);
  return [];
}
 
/**
 * Build a `LoadCSV` closure bound to a `csvBaseURL` / `fallbackURL` pair.
 *
 * Per-domain loaders accept this closure so they can be unit-tested without
 * any reference to the URL configuration of a specific environment.
 *
 * @param csvBaseURL - Base URL for the local-first CSV directory
 * @param fallbackURL - Optional remote fallback URL (empty string disables fallback)
 * @returns A `LoadCSV` function bound to those URLs
 */
export function createLoadCSV(csvBaseURL: string, fallbackURL = ''): LoadCSV {
  return (localPath, fallbackPath) =>
    loadCSV(csvBaseURL, fallbackURL, localPath, fallbackPath);
}