phase B (1/4): CSV parser + InstrumentMap (T212 shortcode → Yahoo ticker)

First two slices of the multi-user roadmap (Phase B). Validates the
core onboarding mechanic against the user's real T212 export before
paying any auth/tenancy tax.

CSV parser (app/services/csv_import.py):
  - Header-name matched (survives T212 reordering columns between
    exports), tolerant of UTF-8 BOM, dash/N/A/empty markers, thousand-
    separator commas, blank rows, zero-quantity stubs, missing Total row.
  - Returns ParsedPie(name, positions, invested, value, result) with
    derived avg_price + current_price per share in account currency.
  - 14 tests covering happy path on the real CSV + 13 edge cases.

InstrumentMap (migration 0006 + app/services/instrument_map.py):
  - Catalogue table mapping T212 ticker → Yahoo ticker, populated by
    sync_from_t212() against the dev's read-only API key. Manual rows
    (manual=True) are protected from auto-overwrite.
  - Pure t212_ticker_to_yahoo() handles both suffix forms: single
    trailing exchange letter (l/a/p/d/m/s/...) and country code (US,
    DE, FR, IT, CA, ...). All 13 of the user's holdings + 15 case-
    coverage tests pass.
  - Live sync against T212 ingests 17,050 instruments (~2.2% unmappable
    on exotic exchanges; can extend the suffix map later).
  - resolve_slice() picks the right listing per shortName using a
    UK-friendly currency preference (GBX > GBP > EUR > USD). Resolved
    correctly for all 13 of the user's positions, including TTE on
    Paris vs the NYSE dual-listing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-16 10:53:08 +01:00
parent 6dac8a2c7f
commit 16e9f5f0cc
7 changed files with 840 additions and 0 deletions

182
tests/test_csv_import.py Normal file
View file

@ -0,0 +1,182 @@
"""Tests for app.services.csv_import.
Uses the real T212 pie-export sample at tests/fixtures/t212_pie_export.csv
for the happy path, then synthetic fixtures for edge cases.
"""
from __future__ import annotations
from pathlib import Path
import pytest
from app.services.csv_import import (
CSVImportError,
ParsedPie,
ParsedPosition,
parse_t212_csv,
)
FIXTURE = Path(__file__).parent / "fixtures" / "t212_pie_export.csv"
# --- Happy path: real T212 export ------------------------------------------
def test_parses_real_t212_export():
pie = parse_t212_csv(FIXTURE.read_bytes())
assert isinstance(pie, ParsedPie)
assert pie.name == "Defensive Ex-US 2026"
assert pie.invested == pytest.approx(11616.75)
assert pie.value == pytest.approx(11619.33)
assert pie.result == pytest.approx(2.58)
assert len(pie.positions) == 13
def test_first_position_fields_resolved():
pie = parse_t212_csv(FIXTURE.read_bytes())
sgln = next(p for p in pie.positions if p.slice == "SGLN")
assert sgln.name == "iShares Physical Gold"
assert sgln.invested_value == pytest.approx(2325)
assert sgln.current_value == pytest.approx(2324.3)
assert sgln.result == pytest.approx(-0.7)
assert sgln.quantity == pytest.approx(35.12084592)
assert sgln.dividends_gained is None # 'N/A' in source
assert sgln.average_price == pytest.approx(2325 / 35.12084592)
assert sgln.current_price == pytest.approx(2324.3 / 35.12084592)
def test_total_row_excluded_from_positions():
pie = parse_t212_csv(FIXTURE.read_bytes())
assert not any(p.slice.lower() == "total" for p in pie.positions)
# --- Edge cases: defensive parsing -----------------------------------------
def test_empty_file_raises():
with pytest.raises(CSVImportError, match="Empty"):
parse_t212_csv("")
def test_missing_required_column_raises():
# Only Name, no Slice or quantity
csv = '"Name","Value"\n"iShares","100"\n'
with pytest.raises(CSVImportError, match="missing required column"):
parse_t212_csv(csv)
def test_no_position_rows_raises():
# Headers present but only the Total aggregate row.
csv = (
'"Slice","Name","Invested value","Value","Result","Owned quantity"\n'
'"Total","Empty Pie",0,0,0,"-"\n'
)
with pytest.raises(CSVImportError, match="no parseable position"):
parse_t212_csv(csv)
def test_reordered_columns():
"""T212 sometimes re-orders columns between exports. Header-name matching
has to make that a non-issue."""
csv = (
'"Name","Owned quantity","Slice","Value","Invested value","Result"\n'
'"Shell","10.5","SHEL","350","340","10"\n'
'"Total","-","Total","350","340","10"\n'
)
pie = parse_t212_csv(csv)
assert len(pie.positions) == 1
p = pie.positions[0]
assert p.slice == "SHEL"
assert p.name == "Shell"
assert p.quantity == pytest.approx(10.5)
assert p.invested_value == pytest.approx(340)
def test_unknown_columns_silently_ignored():
csv = (
'"Slice","Name","Owned quantity","Invested value",'
'"Something T212 added later","Value"\n'
'"SHEL","Shell","10","100","ignore me","105"\n'
'"Total","-","-","100","-","105"\n'
)
pie = parse_t212_csv(csv)
assert pie.positions[0].slice == "SHEL"
assert pie.positions[0].current_value == pytest.approx(105)
def test_handles_utf8_bom():
"""Excel-saved CSVs often have a UTF-8 BOM. Should not break header
matching for the first column."""
csv = (
""
'"Slice","Name","Invested value","Value","Result","Owned quantity"\n'
'"SHEL","Shell",100,105,5,"10"\n'
'"Total","Test",100,105,5,"-"\n'
).encode("utf-8")
pie = parse_t212_csv(csv)
assert pie.positions[0].slice == "SHEL"
assert pie.name == "Test"
def test_dash_and_na_become_none():
csv = (
'"Slice","Name","Invested value","Value","Result","Owned quantity",'
'"Dividends gained","Dividends cash","Dividends reinvested"\n'
'"SHEL","Shell",100,"-",,"10","N/A","","n/a"\n'
'"Total","P",100,100,0,"-","0","0","0"\n'
)
pie = parse_t212_csv(csv)
p = pie.positions[0]
assert p.current_value is None
assert p.result is None
assert p.dividends_gained is None
assert p.dividends_cash is None
assert p.dividends_reinvested is None
def test_thousand_separator_commas_tolerated():
csv = (
'"Slice","Name","Invested value","Value","Result","Owned quantity"\n'
'"BIG","Mega Holding","1,234,567.89","1,250,000",15432.11,"100"\n'
'"Total","Big Pie","1,234,567.89","1,250,000",15432.11,"-"\n'
)
pie = parse_t212_csv(csv)
assert pie.positions[0].invested_value == pytest.approx(1234567.89)
assert pie.invested == pytest.approx(1234567.89)
def test_blank_rows_skipped():
csv = (
'"Slice","Name","Invested value","Value","Result","Owned quantity"\n'
'\n'
'"SHEL","Shell",100,105,5,"10"\n'
' \n'
'"Total","P",100,105,5,"-"\n'
)
pie = parse_t212_csv(csv)
assert len(pie.positions) == 1
def test_zero_quantity_skipped():
"""A position row with quantity=0 isn't a real holding — likely a stub
left over from a fully-sold position. Don't fail; skip."""
csv = (
'"Slice","Name","Invested value","Value","Result","Owned quantity"\n'
'"GONE","Sold Out",0,0,0,"0"\n'
'"SHEL","Shell",100,105,5,"10"\n'
'"Total","P",100,105,5,"-"\n'
)
pie = parse_t212_csv(csv)
assert [p.slice for p in pie.positions] == ["SHEL"]
def test_position_without_total_row_still_parses():
csv = (
'"Slice","Name","Invested value","Value","Result","Owned quantity"\n'
'"SHEL","Shell",100,105,5,"10"\n'
)
pie = parse_t212_csv(csv)
assert len(pie.positions) == 1
assert pie.name is None
assert pie.invested is None