From b99f46d2fc720405588d25b6debf0479ae75584e Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Wed, 27 May 2026 12:18:31 +0200 Subject: [PATCH] csv-parser: add _apply_mapping helper Co-Authored-By: Claude Opus 4.7 --- app/services/llm_csv_parser.py | 79 +++++++++++++++++++++++++++++++++- tests/test_llm_csv_parser.py | 68 +++++++++++++++++++++++++++++ 2 files changed, 146 insertions(+), 1 deletion(-) diff --git a/app/services/llm_csv_parser.py b/app/services/llm_csv_parser.py index ee891fc..44cfad1 100644 --- a/app/services/llm_csv_parser.py +++ b/app/services/llm_csv_parser.py @@ -21,7 +21,7 @@ import csv import hashlib import io -from app.services.csv_import import CSVImportError +from app.services.csv_import import CSVImportError, ParsedPie, ParsedPosition # --------------------------------------------------------------------------- # Module-level constants @@ -174,3 +174,80 @@ def _validate_mapping( raise LLMParseError( f"LLM mapping cost_col={cost_col!r} maps to non-numeric value {cost_value!r}" ) + + +def _parse_number(value: str) -> float | None: + """Permissive float parse: strips thousands separators, currency + symbols, percent signs. Returns None on failure (so callers can + decide whether to skip or raise).""" + s = value.strip().replace(",", "").replace("$", "") + s = s.replace("€", "").replace("£", "").replace("%", "") + if not s: + return None + try: + return float(s) + except ValueError: + return None + + +def _apply_mapping( + headers: list[str], + data_rows: list[list[str]], + mapping: dict, +) -> ParsedPie: + """Iterate ``data_rows`` and produce a ``ParsedPie``. + + Rows that lack a parseable quantity (blank, non-numeric, zero) are + silently skipped — broker exports often include summary or + placeholder rows after the position list. ``name_col`` falls back + to the ticker symbol when null.""" + idx = {h: i for i, h in enumerate(headers)} + ticker_col = mapping["ticker_col"] + qty_col = mapping["qty_col"] + name_col = mapping.get("name_col") + cost_col = mapping.get("cost_col") + + positions: list[ParsedPosition] = [] + invested_total = 0.0 + invested_seen = False + + for row in data_rows: + if not any(c.strip() for c in row): + continue + ticker_raw = row[idx[ticker_col]] if idx[ticker_col] < len(row) else "" + ticker = ticker_raw.strip().upper() + if not ticker: + continue + qty_raw = row[idx[qty_col]] if idx[qty_col] < len(row) else "" + qty = _parse_number(qty_raw) + if qty is None or qty <= 0: + continue + avg_cost: float | None = None + if cost_col is not None and idx[cost_col] < len(row): + avg_cost = _parse_number(row[idx[cost_col]]) + invested_value: float | None = None + if avg_cost is not None: + invested_value = qty * avg_cost + invested_total += invested_value + invested_seen = True + name = "" + if name_col is not None and idx[name_col] < len(row): + name = row[idx[name_col]].strip() + if not name: + name = ticker + positions.append(ParsedPosition( + slice=ticker, + name=name, + invested_value=invested_value, + current_value=None, + result=None, + quantity=qty, + )) + + return ParsedPie( + name=None, + positions=tuple(positions), + invested=(invested_total if invested_seen else None), + value=None, + result=None, + ) diff --git a/tests/test_llm_csv_parser.py b/tests/test_llm_csv_parser.py index db75bf4..c8a55cc 100644 --- a/tests/test_llm_csv_parser.py +++ b/tests/test_llm_csv_parser.py @@ -152,3 +152,71 @@ def test_validate_mapping_non_numeric_qty_raises(): mapping = {"ticker_col": "Symbol", "qty_col": "Description"} with pytest.raises(LLMParseError, match="numeric"): _validate_mapping(mapping, headers, first_row) + + +def test_apply_mapping_builds_parsed_pie(): + from app.services.csv_import import ParsedPie, ParsedPosition + from app.services.llm_csv_parser import _apply_mapping + + headers = ["Symbol", "Quantity", "Avg Price", "Currency", "Description"] + data_rows = [ + ["AAPL", "100", "150.25", "USD", "Apple Inc"], + ["MSFT", "50", "310.00", "USD", "Microsoft Corp"], + ] + mapping = { + "ticker_col": "Symbol", + "qty_col": "Quantity", + "cost_col": "Avg Price", + "currency_col": "Currency", + "name_col": "Description", + } + + pie = _apply_mapping(headers, data_rows, mapping) + + assert isinstance(pie, ParsedPie) + assert len(pie.positions) == 2 + p0 = pie.positions[0] + assert isinstance(p0, ParsedPosition) + assert p0.slice == "AAPL" + assert p0.name == "Apple Inc" + assert p0.quantity == 100.0 + assert p0.invested_value == pytest.approx(15025.0) + # invested = qty * avg_cost = 100 * 150.25 = 15025 + assert pie.invested == pytest.approx(15025.0 + 50 * 310.00) + + +def test_apply_mapping_handles_missing_optional_columns(): + from app.services.llm_csv_parser import _apply_mapping + + headers = ["Symbol", "Quantity"] + data_rows = [["AAPL", "100"]] + mapping = { + "ticker_col": "Symbol", + "qty_col": "Quantity", + "cost_col": None, + "currency_col": None, + "name_col": None, + } + + pie = _apply_mapping(headers, data_rows, mapping) + p = pie.positions[0] + assert p.slice == "AAPL" + assert p.quantity == 100.0 + assert p.invested_value is None + assert p.name == "AAPL" # falls back to ticker when name_col absent + + +def test_apply_mapping_skips_blank_and_unparseable_rows(): + from app.services.llm_csv_parser import _apply_mapping + + headers = ["Symbol", "Quantity"] + data_rows = [ + ["AAPL", "100"], + ["", ""], # blank + ["MSFT", "not-a-number"], # bad qty + ["NVDA", "40"], + ] + mapping = {"ticker_col": "Symbol", "qty_col": "Quantity"} + + pie = _apply_mapping(headers, data_rows, mapping) + assert [p.slice for p in pie.positions] == ["AAPL", "NVDA"]