csv-parser: add _apply_mapping helper

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-27 12:18:31 +02:00
parent f44b77df6f
commit b99f46d2fc
2 changed files with 146 additions and 1 deletions

View file

@ -21,7 +21,7 @@ import csv
import hashlib
import io
from app.services.csv_import import CSVImportError
from app.services.csv_import import CSVImportError, ParsedPie, ParsedPosition
# ---------------------------------------------------------------------------
# Module-level constants
@ -174,3 +174,80 @@ def _validate_mapping(
raise LLMParseError(
f"LLM mapping cost_col={cost_col!r} maps to non-numeric value {cost_value!r}"
)
def _parse_number(value: str) -> float | None:
"""Permissive float parse: strips thousands separators, currency
symbols, percent signs. Returns None on failure (so callers can
decide whether to skip or raise)."""
s = value.strip().replace(",", "").replace("$", "")
s = s.replace("", "").replace("£", "").replace("%", "")
if not s:
return None
try:
return float(s)
except ValueError:
return None
def _apply_mapping(
headers: list[str],
data_rows: list[list[str]],
mapping: dict,
) -> ParsedPie:
"""Iterate ``data_rows`` and produce a ``ParsedPie``.
Rows that lack a parseable quantity (blank, non-numeric, zero) are
silently skipped broker exports often include summary or
placeholder rows after the position list. ``name_col`` falls back
to the ticker symbol when null."""
idx = {h: i for i, h in enumerate(headers)}
ticker_col = mapping["ticker_col"]
qty_col = mapping["qty_col"]
name_col = mapping.get("name_col")
cost_col = mapping.get("cost_col")
positions: list[ParsedPosition] = []
invested_total = 0.0
invested_seen = False
for row in data_rows:
if not any(c.strip() for c in row):
continue
ticker_raw = row[idx[ticker_col]] if idx[ticker_col] < len(row) else ""
ticker = ticker_raw.strip().upper()
if not ticker:
continue
qty_raw = row[idx[qty_col]] if idx[qty_col] < len(row) else ""
qty = _parse_number(qty_raw)
if qty is None or qty <= 0:
continue
avg_cost: float | None = None
if cost_col is not None and idx[cost_col] < len(row):
avg_cost = _parse_number(row[idx[cost_col]])
invested_value: float | None = None
if avg_cost is not None:
invested_value = qty * avg_cost
invested_total += invested_value
invested_seen = True
name = ""
if name_col is not None and idx[name_col] < len(row):
name = row[idx[name_col]].strip()
if not name:
name = ticker
positions.append(ParsedPosition(
slice=ticker,
name=name,
invested_value=invested_value,
current_value=None,
result=None,
quantity=qty,
))
return ParsedPie(
name=None,
positions=tuple(positions),
invested=(invested_total if invested_seen else None),
value=None,
result=None,
)

View file

@ -152,3 +152,71 @@ def test_validate_mapping_non_numeric_qty_raises():
mapping = {"ticker_col": "Symbol", "qty_col": "Description"}
with pytest.raises(LLMParseError, match="numeric"):
_validate_mapping(mapping, headers, first_row)
def test_apply_mapping_builds_parsed_pie():
from app.services.csv_import import ParsedPie, ParsedPosition
from app.services.llm_csv_parser import _apply_mapping
headers = ["Symbol", "Quantity", "Avg Price", "Currency", "Description"]
data_rows = [
["AAPL", "100", "150.25", "USD", "Apple Inc"],
["MSFT", "50", "310.00", "USD", "Microsoft Corp"],
]
mapping = {
"ticker_col": "Symbol",
"qty_col": "Quantity",
"cost_col": "Avg Price",
"currency_col": "Currency",
"name_col": "Description",
}
pie = _apply_mapping(headers, data_rows, mapping)
assert isinstance(pie, ParsedPie)
assert len(pie.positions) == 2
p0 = pie.positions[0]
assert isinstance(p0, ParsedPosition)
assert p0.slice == "AAPL"
assert p0.name == "Apple Inc"
assert p0.quantity == 100.0
assert p0.invested_value == pytest.approx(15025.0)
# invested = qty * avg_cost = 100 * 150.25 = 15025
assert pie.invested == pytest.approx(15025.0 + 50 * 310.00)
def test_apply_mapping_handles_missing_optional_columns():
from app.services.llm_csv_parser import _apply_mapping
headers = ["Symbol", "Quantity"]
data_rows = [["AAPL", "100"]]
mapping = {
"ticker_col": "Symbol",
"qty_col": "Quantity",
"cost_col": None,
"currency_col": None,
"name_col": None,
}
pie = _apply_mapping(headers, data_rows, mapping)
p = pie.positions[0]
assert p.slice == "AAPL"
assert p.quantity == 100.0
assert p.invested_value is None
assert p.name == "AAPL" # falls back to ticker when name_col absent
def test_apply_mapping_skips_blank_and_unparseable_rows():
from app.services.llm_csv_parser import _apply_mapping
headers = ["Symbol", "Quantity"]
data_rows = [
["AAPL", "100"],
["", ""], # blank
["MSFT", "not-a-number"], # bad qty
["NVDA", "40"],
]
mapping = {"ticker_col": "Symbol", "qty_col": "Quantity"}
pie = _apply_mapping(headers, data_rows, mapping)
assert [p.slice for p in pie.positions] == ["AAPL", "NVDA"]