csv-parser: add _validate_mapping helper

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-27 12:16:26 +02:00
parent 8dcf662945
commit f44b77df6f
2 changed files with 106 additions and 0 deletions

View file

@ -31,6 +31,10 @@ from app.services.csv_import import CSVImportError
# Real broker preambles are typically 1-10 lines.
_MAX_PREAMBLE_SCAN = 30
# Required and optional keys in the LLM-returned column mapping.
_REQUIRED_MAPPING_KEYS = ("ticker_col", "qty_col")
_OPTIONAL_MAPPING_KEYS = ("name_col", "cost_col", "currency_col")
class LLMParseError(CSVImportError):
"""Raised when the LLM call fails or returns an unusable mapping.
@ -124,3 +128,49 @@ def _detect_dialect(raw: bytes) -> tuple[str, int]:
# Next row is also all-alpha → keep scanning
break
return delimiter, 0
def _validate_mapping(
mapping: dict, headers: list[str], first_row: list[str],
) -> None:
"""Verify the LLM-returned mapping is sane.
- ``ticker_col`` and ``qty_col`` are required (non-null).
- Every named column must exist in ``headers``.
- The value at ``qty_col`` on ``first_row`` must parse as a number.
- The value at ``cost_col`` on ``first_row`` (if present) must parse
as a number.
Raises ``LLMParseError`` on any failure, with a message that names
the specific problem (helpful for log forensics and for the
user-facing 400)."""
for key in _REQUIRED_MAPPING_KEYS:
if not mapping.get(key):
raise LLMParseError(
f"LLM mapping missing required column: {key.replace('_col', '')}"
)
headers_set = set(headers)
for key in _REQUIRED_MAPPING_KEYS + _OPTIONAL_MAPPING_KEYS:
col = mapping.get(key)
if col is not None and col not in headers_set:
raise LLMParseError(
f"LLM mapping references unknown column: {col!r}"
)
# Numeric sanity check: qty and (if present) cost must parse on row 1.
header_index = {h: i for i, h in enumerate(headers)}
qty_col = mapping["qty_col"]
qty_value = first_row[header_index[qty_col]] if header_index[qty_col] < len(first_row) else ""
if not _looks_numeric(qty_value):
raise LLMParseError(
f"LLM mapping qty_col={qty_col!r} maps to non-numeric value {qty_value!r}"
)
cost_col = mapping.get("cost_col")
if cost_col is not None:
cost_value = first_row[header_index[cost_col]] if header_index[cost_col] < len(first_row) else ""
if cost_value and not _looks_numeric(cost_value):
raise LLMParseError(
f"LLM mapping cost_col={cost_col!r} maps to non-numeric value {cost_value!r}"
)