csv-parser: add _validate_mapping helper
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
8dcf662945
commit
f44b77df6f
2 changed files with 106 additions and 0 deletions
|
|
@ -31,6 +31,10 @@ from app.services.csv_import import CSVImportError
|
|||
# Real broker preambles are typically 1-10 lines.
|
||||
_MAX_PREAMBLE_SCAN = 30
|
||||
|
||||
# Required and optional keys in the LLM-returned column mapping.
|
||||
_REQUIRED_MAPPING_KEYS = ("ticker_col", "qty_col")
|
||||
_OPTIONAL_MAPPING_KEYS = ("name_col", "cost_col", "currency_col")
|
||||
|
||||
|
||||
class LLMParseError(CSVImportError):
|
||||
"""Raised when the LLM call fails or returns an unusable mapping.
|
||||
|
|
@ -124,3 +128,49 @@ def _detect_dialect(raw: bytes) -> tuple[str, int]:
|
|||
# Next row is also all-alpha → keep scanning
|
||||
break
|
||||
return delimiter, 0
|
||||
|
||||
|
||||
def _validate_mapping(
|
||||
mapping: dict, headers: list[str], first_row: list[str],
|
||||
) -> None:
|
||||
"""Verify the LLM-returned mapping is sane.
|
||||
|
||||
- ``ticker_col`` and ``qty_col`` are required (non-null).
|
||||
- Every named column must exist in ``headers``.
|
||||
- The value at ``qty_col`` on ``first_row`` must parse as a number.
|
||||
- The value at ``cost_col`` on ``first_row`` (if present) must parse
|
||||
as a number.
|
||||
|
||||
Raises ``LLMParseError`` on any failure, with a message that names
|
||||
the specific problem (helpful for log forensics and for the
|
||||
user-facing 400)."""
|
||||
for key in _REQUIRED_MAPPING_KEYS:
|
||||
if not mapping.get(key):
|
||||
raise LLMParseError(
|
||||
f"LLM mapping missing required column: {key.replace('_col', '')}"
|
||||
)
|
||||
|
||||
headers_set = set(headers)
|
||||
for key in _REQUIRED_MAPPING_KEYS + _OPTIONAL_MAPPING_KEYS:
|
||||
col = mapping.get(key)
|
||||
if col is not None and col not in headers_set:
|
||||
raise LLMParseError(
|
||||
f"LLM mapping references unknown column: {col!r}"
|
||||
)
|
||||
|
||||
# Numeric sanity check: qty and (if present) cost must parse on row 1.
|
||||
header_index = {h: i for i, h in enumerate(headers)}
|
||||
qty_col = mapping["qty_col"]
|
||||
qty_value = first_row[header_index[qty_col]] if header_index[qty_col] < len(first_row) else ""
|
||||
if not _looks_numeric(qty_value):
|
||||
raise LLMParseError(
|
||||
f"LLM mapping qty_col={qty_col!r} maps to non-numeric value {qty_value!r}"
|
||||
)
|
||||
|
||||
cost_col = mapping.get("cost_col")
|
||||
if cost_col is not None:
|
||||
cost_value = first_row[header_index[cost_col]] if header_index[cost_col] < len(first_row) else ""
|
||||
if cost_value and not _looks_numeric(cost_value):
|
||||
raise LLMParseError(
|
||||
f"LLM mapping cost_col={cost_col!r} maps to non-numeric value {cost_value!r}"
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue