csv-parser: add _apply_mapping helper
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
f44b77df6f
commit
b99f46d2fc
2 changed files with 146 additions and 1 deletions
|
|
@ -21,7 +21,7 @@ import csv
|
||||||
import hashlib
|
import hashlib
|
||||||
import io
|
import io
|
||||||
|
|
||||||
from app.services.csv_import import CSVImportError
|
from app.services.csv_import import CSVImportError, ParsedPie, ParsedPosition
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# Module-level constants
|
# Module-level constants
|
||||||
|
|
@ -174,3 +174,80 @@ def _validate_mapping(
|
||||||
raise LLMParseError(
|
raise LLMParseError(
|
||||||
f"LLM mapping cost_col={cost_col!r} maps to non-numeric value {cost_value!r}"
|
f"LLM mapping cost_col={cost_col!r} maps to non-numeric value {cost_value!r}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_number(value: str) -> float | None:
|
||||||
|
"""Permissive float parse: strips thousands separators, currency
|
||||||
|
symbols, percent signs. Returns None on failure (so callers can
|
||||||
|
decide whether to skip or raise)."""
|
||||||
|
s = value.strip().replace(",", "").replace("$", "")
|
||||||
|
s = s.replace("€", "").replace("£", "").replace("%", "")
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return float(s)
|
||||||
|
except ValueError:
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_mapping(
|
||||||
|
headers: list[str],
|
||||||
|
data_rows: list[list[str]],
|
||||||
|
mapping: dict,
|
||||||
|
) -> ParsedPie:
|
||||||
|
"""Iterate ``data_rows`` and produce a ``ParsedPie``.
|
||||||
|
|
||||||
|
Rows that lack a parseable quantity (blank, non-numeric, zero) are
|
||||||
|
silently skipped — broker exports often include summary or
|
||||||
|
placeholder rows after the position list. ``name_col`` falls back
|
||||||
|
to the ticker symbol when null."""
|
||||||
|
idx = {h: i for i, h in enumerate(headers)}
|
||||||
|
ticker_col = mapping["ticker_col"]
|
||||||
|
qty_col = mapping["qty_col"]
|
||||||
|
name_col = mapping.get("name_col")
|
||||||
|
cost_col = mapping.get("cost_col")
|
||||||
|
|
||||||
|
positions: list[ParsedPosition] = []
|
||||||
|
invested_total = 0.0
|
||||||
|
invested_seen = False
|
||||||
|
|
||||||
|
for row in data_rows:
|
||||||
|
if not any(c.strip() for c in row):
|
||||||
|
continue
|
||||||
|
ticker_raw = row[idx[ticker_col]] if idx[ticker_col] < len(row) else ""
|
||||||
|
ticker = ticker_raw.strip().upper()
|
||||||
|
if not ticker:
|
||||||
|
continue
|
||||||
|
qty_raw = row[idx[qty_col]] if idx[qty_col] < len(row) else ""
|
||||||
|
qty = _parse_number(qty_raw)
|
||||||
|
if qty is None or qty <= 0:
|
||||||
|
continue
|
||||||
|
avg_cost: float | None = None
|
||||||
|
if cost_col is not None and idx[cost_col] < len(row):
|
||||||
|
avg_cost = _parse_number(row[idx[cost_col]])
|
||||||
|
invested_value: float | None = None
|
||||||
|
if avg_cost is not None:
|
||||||
|
invested_value = qty * avg_cost
|
||||||
|
invested_total += invested_value
|
||||||
|
invested_seen = True
|
||||||
|
name = ""
|
||||||
|
if name_col is not None and idx[name_col] < len(row):
|
||||||
|
name = row[idx[name_col]].strip()
|
||||||
|
if not name:
|
||||||
|
name = ticker
|
||||||
|
positions.append(ParsedPosition(
|
||||||
|
slice=ticker,
|
||||||
|
name=name,
|
||||||
|
invested_value=invested_value,
|
||||||
|
current_value=None,
|
||||||
|
result=None,
|
||||||
|
quantity=qty,
|
||||||
|
))
|
||||||
|
|
||||||
|
return ParsedPie(
|
||||||
|
name=None,
|
||||||
|
positions=tuple(positions),
|
||||||
|
invested=(invested_total if invested_seen else None),
|
||||||
|
value=None,
|
||||||
|
result=None,
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -152,3 +152,71 @@ def test_validate_mapping_non_numeric_qty_raises():
|
||||||
mapping = {"ticker_col": "Symbol", "qty_col": "Description"}
|
mapping = {"ticker_col": "Symbol", "qty_col": "Description"}
|
||||||
with pytest.raises(LLMParseError, match="numeric"):
|
with pytest.raises(LLMParseError, match="numeric"):
|
||||||
_validate_mapping(mapping, headers, first_row)
|
_validate_mapping(mapping, headers, first_row)
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_mapping_builds_parsed_pie():
|
||||||
|
from app.services.csv_import import ParsedPie, ParsedPosition
|
||||||
|
from app.services.llm_csv_parser import _apply_mapping
|
||||||
|
|
||||||
|
headers = ["Symbol", "Quantity", "Avg Price", "Currency", "Description"]
|
||||||
|
data_rows = [
|
||||||
|
["AAPL", "100", "150.25", "USD", "Apple Inc"],
|
||||||
|
["MSFT", "50", "310.00", "USD", "Microsoft Corp"],
|
||||||
|
]
|
||||||
|
mapping = {
|
||||||
|
"ticker_col": "Symbol",
|
||||||
|
"qty_col": "Quantity",
|
||||||
|
"cost_col": "Avg Price",
|
||||||
|
"currency_col": "Currency",
|
||||||
|
"name_col": "Description",
|
||||||
|
}
|
||||||
|
|
||||||
|
pie = _apply_mapping(headers, data_rows, mapping)
|
||||||
|
|
||||||
|
assert isinstance(pie, ParsedPie)
|
||||||
|
assert len(pie.positions) == 2
|
||||||
|
p0 = pie.positions[0]
|
||||||
|
assert isinstance(p0, ParsedPosition)
|
||||||
|
assert p0.slice == "AAPL"
|
||||||
|
assert p0.name == "Apple Inc"
|
||||||
|
assert p0.quantity == 100.0
|
||||||
|
assert p0.invested_value == pytest.approx(15025.0)
|
||||||
|
# invested = qty * avg_cost = 100 * 150.25 = 15025
|
||||||
|
assert pie.invested == pytest.approx(15025.0 + 50 * 310.00)
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_mapping_handles_missing_optional_columns():
|
||||||
|
from app.services.llm_csv_parser import _apply_mapping
|
||||||
|
|
||||||
|
headers = ["Symbol", "Quantity"]
|
||||||
|
data_rows = [["AAPL", "100"]]
|
||||||
|
mapping = {
|
||||||
|
"ticker_col": "Symbol",
|
||||||
|
"qty_col": "Quantity",
|
||||||
|
"cost_col": None,
|
||||||
|
"currency_col": None,
|
||||||
|
"name_col": None,
|
||||||
|
}
|
||||||
|
|
||||||
|
pie = _apply_mapping(headers, data_rows, mapping)
|
||||||
|
p = pie.positions[0]
|
||||||
|
assert p.slice == "AAPL"
|
||||||
|
assert p.quantity == 100.0
|
||||||
|
assert p.invested_value is None
|
||||||
|
assert p.name == "AAPL" # falls back to ticker when name_col absent
|
||||||
|
|
||||||
|
|
||||||
|
def test_apply_mapping_skips_blank_and_unparseable_rows():
|
||||||
|
from app.services.llm_csv_parser import _apply_mapping
|
||||||
|
|
||||||
|
headers = ["Symbol", "Quantity"]
|
||||||
|
data_rows = [
|
||||||
|
["AAPL", "100"],
|
||||||
|
["", ""], # blank
|
||||||
|
["MSFT", "not-a-number"], # bad qty
|
||||||
|
["NVDA", "40"],
|
||||||
|
]
|
||||||
|
mapping = {"ticker_col": "Symbol", "qty_col": "Quantity"}
|
||||||
|
|
||||||
|
pie = _apply_mapping(headers, data_rows, mapping)
|
||||||
|
assert [p.slice for p in pie.positions] == ["AAPL", "NVDA"]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue