phase B (1/4): CSV parser + InstrumentMap (T212 shortcode → Yahoo ticker)
First two slices of the multi-user roadmap (Phase B). Validates the
core onboarding mechanic against the user's real T212 export before
paying any auth/tenancy tax.
CSV parser (app/services/csv_import.py):
- Header-name matched (survives T212 reordering columns between
exports), tolerant of UTF-8 BOM, dash/N/A/empty markers, thousand-
separator commas, blank rows, zero-quantity stubs, missing Total row.
- Returns ParsedPie(name, positions, invested, value, result) with
derived avg_price + current_price per share in account currency.
- 14 tests covering happy path on the real CSV + 13 edge cases.
InstrumentMap (migration 0006 + app/services/instrument_map.py):
- Catalogue table mapping T212 ticker → Yahoo ticker, populated by
sync_from_t212() against the dev's read-only API key. Manual rows
(manual=True) are protected from auto-overwrite.
- Pure t212_ticker_to_yahoo() handles both suffix forms: single
trailing exchange letter (l/a/p/d/m/s/...) and country code (US,
DE, FR, IT, CA, ...). All 13 of the user's holdings + 15 case-
coverage tests pass.
- Live sync against T212 ingests 17,050 instruments (~2.2% unmappable
on exotic exchanges; can extend the suffix map later).
- resolve_slice() picks the right listing per shortName using a
UK-friendly currency preference (GBX > GBP > EUR > USD). Resolved
correctly for all 13 of the user's positions, including TTE on
Paris vs the NYSE dual-listing.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6dac8a2c7f
commit
16e9f5f0cc
7 changed files with 840 additions and 0 deletions
199
app/services/csv_import.py
Normal file
199
app/services/csv_import.py
Normal file
|
|
@ -0,0 +1,199 @@
|
|||
"""Defensive parser for Trading 212 pie-export CSVs.
|
||||
|
||||
T212 has changed column order between exports historically; matching on header
|
||||
NAME rather than column index makes this robust. We also explicitly skip the
|
||||
'Total' aggregate row (it has slice='Total' and quantity='-').
|
||||
|
||||
Pure function — no DB, no HTTP. Persisting into PortfolioSnapshot/Position is
|
||||
done by the upload endpoint after mapping each Slice to a Yahoo ticker via the
|
||||
InstrumentMap service.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import csv
|
||||
import io
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
class CSVImportError(ValueError):
|
||||
"""Raised when the CSV is unparseable or missing required columns."""
|
||||
|
||||
|
||||
# Header name -> normalised key used in the parsed dict. Lowercase, ignore
|
||||
# leading/trailing whitespace, treat case-insensitively. Extra columns are
|
||||
# silently ignored.
|
||||
_HEADER_MAP = {
|
||||
"slice": "slice",
|
||||
"name": "name",
|
||||
"invested value": "invested_value",
|
||||
"value": "current_value",
|
||||
"result": "result",
|
||||
"owned quantity": "quantity",
|
||||
"dividends gained": "dividends_gained",
|
||||
"dividends cash": "dividends_cash",
|
||||
"dividends reinvested": "dividends_reinvested",
|
||||
}
|
||||
|
||||
# These must be present for the import to be meaningful at all.
|
||||
_REQUIRED_FIELDS = ("slice", "quantity")
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ParsedPosition:
|
||||
slice: str # T212 shortcode, e.g. "SGLN"
|
||||
name: str
|
||||
invested_value: float | None
|
||||
current_value: float | None
|
||||
result: float | None # P/L in pie currency
|
||||
quantity: float
|
||||
dividends_gained: float | None = None
|
||||
dividends_cash: float | None = None
|
||||
dividends_reinvested: float | None = None
|
||||
|
||||
@property
|
||||
def average_price(self) -> float | None:
|
||||
if self.invested_value is None or not self.quantity:
|
||||
return None
|
||||
return self.invested_value / self.quantity
|
||||
|
||||
@property
|
||||
def current_price(self) -> float | None:
|
||||
if self.current_value is None or not self.quantity:
|
||||
return None
|
||||
return self.current_value / self.quantity
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ParsedPie:
|
||||
name: str | None # from the Total row's Name column
|
||||
positions: tuple[ParsedPosition, ...]
|
||||
invested: float | None # totals from the Total row
|
||||
value: float | None
|
||||
result: float | None
|
||||
|
||||
|
||||
def _normalise_header(h: str) -> str:
|
||||
return h.strip().lower()
|
||||
|
||||
|
||||
def _parse_num(raw: str | None) -> float | None:
|
||||
"""Empty / 'N/A' / '-' / '—' → None. Otherwise float."""
|
||||
if raw is None:
|
||||
return None
|
||||
s = raw.strip()
|
||||
if not s or s in {"-", "—", "N/A", "n/a", "NA"}:
|
||||
return None
|
||||
# T212 occasionally exports with thousand-comma. Strip safely.
|
||||
s = s.replace(",", "")
|
||||
try:
|
||||
return float(s)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def parse_t212_csv(content: str | bytes) -> ParsedPie:
|
||||
"""Parse a T212 pie-export CSV.
|
||||
|
||||
Args:
|
||||
content: bytes or str containing the CSV (raw export file contents).
|
||||
|
||||
Returns:
|
||||
ParsedPie with positions list and aggregate totals.
|
||||
|
||||
Raises:
|
||||
CSVImportError: if the file is empty, missing required headers,
|
||||
or contains no usable rows.
|
||||
"""
|
||||
if isinstance(content, bytes):
|
||||
try:
|
||||
content = content.decode("utf-8-sig") # handle Excel BOM
|
||||
except UnicodeDecodeError:
|
||||
content = content.decode("latin-1")
|
||||
|
||||
reader = csv.reader(io.StringIO(content))
|
||||
try:
|
||||
header_row = next(reader)
|
||||
except StopIteration:
|
||||
raise CSVImportError("Empty CSV file")
|
||||
|
||||
# Map column index -> normalised field name. Unknown headers are ignored.
|
||||
field_by_index: dict[int, str] = {}
|
||||
for i, h in enumerate(header_row):
|
||||
key = _HEADER_MAP.get(_normalise_header(h))
|
||||
if key:
|
||||
field_by_index[i] = key
|
||||
|
||||
missing = [f for f in _REQUIRED_FIELDS if f not in field_by_index.values()]
|
||||
if missing:
|
||||
raise CSVImportError(
|
||||
f"CSV missing required column(s): {', '.join(missing)}. "
|
||||
f"Found headers: {header_row}"
|
||||
)
|
||||
|
||||
positions: list[ParsedPosition] = []
|
||||
total: ParsedPosition | None = None
|
||||
pie_name: str | None = None
|
||||
|
||||
for row_num, row in enumerate(reader, start=2):
|
||||
if not row or not any(cell.strip() for cell in row):
|
||||
continue # skip blank lines
|
||||
|
||||
record: dict[str, object] = {}
|
||||
for idx, field in field_by_index.items():
|
||||
raw = row[idx] if idx < len(row) else ""
|
||||
if field in {"slice", "name"}:
|
||||
record[field] = raw.strip()
|
||||
else:
|
||||
record[field] = _parse_num(raw)
|
||||
|
||||
slice_code = record.get("slice") or ""
|
||||
if not slice_code:
|
||||
continue # malformed; skip silently rather than abort
|
||||
|
||||
# The 'Total' row uses slice='Total' and quantity='-' — capture it
|
||||
# for aggregate totals but don't list it as a position.
|
||||
if slice_code.lower() == "total":
|
||||
pie_name = (record.get("name") or "").strip() or None
|
||||
total = ParsedPosition(
|
||||
slice=slice_code,
|
||||
name=pie_name or "Total",
|
||||
invested_value=record.get("invested_value"),
|
||||
current_value=record.get("current_value"),
|
||||
result=record.get("result"),
|
||||
quantity=0.0,
|
||||
dividends_gained=record.get("dividends_gained"),
|
||||
dividends_cash=record.get("dividends_cash"),
|
||||
dividends_reinvested=record.get("dividends_reinvested"),
|
||||
)
|
||||
continue
|
||||
|
||||
qty = record.get("quantity")
|
||||
if qty is None or qty == 0:
|
||||
# Position row with no usable quantity — skip rather than fail.
|
||||
continue
|
||||
|
||||
positions.append(ParsedPosition(
|
||||
slice=slice_code,
|
||||
name=(record.get("name") or "").strip(),
|
||||
invested_value=record.get("invested_value"),
|
||||
current_value=record.get("current_value"),
|
||||
result=record.get("result"),
|
||||
quantity=qty,
|
||||
dividends_gained=record.get("dividends_gained"),
|
||||
dividends_cash=record.get("dividends_cash"),
|
||||
dividends_reinvested=record.get("dividends_reinvested"),
|
||||
))
|
||||
|
||||
if not positions:
|
||||
raise CSVImportError(
|
||||
"CSV contained no parseable position rows. "
|
||||
"Expected at least one row with a Slice code and quantity."
|
||||
)
|
||||
|
||||
return ParsedPie(
|
||||
name=pie_name,
|
||||
positions=tuple(positions),
|
||||
invested=total.invested_value if total else None,
|
||||
value=total.current_value if total else None,
|
||||
result=total.result if total else None,
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue