222 lines
7.2 KiB
Python
222 lines
7.2 KiB
Python
"""Unit + integration tests for the LLM-fallback CSV parser."""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
|
|
def test_csv_format_template_model_columns():
|
|
"""Model exposes every column the spec requires, with correct types."""
|
|
from sqlalchemy import inspect
|
|
|
|
from app.models import CsvFormatTemplate
|
|
|
|
cols = {c.name: c for c in inspect(CsvFormatTemplate).columns}
|
|
assert "fingerprint" in cols
|
|
assert "headers" in cols
|
|
assert "sample_row" in cols
|
|
assert "mapping" in cols
|
|
assert "preamble_rows" in cols
|
|
assert "delimiter" in cols
|
|
assert "broker_label" in cols
|
|
assert "first_seen_at" in cols
|
|
assert "use_count" in cols
|
|
assert "last_used_at" in cols
|
|
assert "llm_model" in cols
|
|
assert "llm_cost_usd" in cols
|
|
# Crucially, no user attribution.
|
|
assert "user_id" not in cols
|
|
assert "first_seen_user_id" not in cols
|
|
# Fingerprint is the cache key.
|
|
assert cols["fingerprint"].unique is True
|
|
assert cols["fingerprint"].nullable is False
|
|
|
|
|
|
def test_fingerprint_stable_across_case_and_whitespace():
|
|
from app.services.llm_csv_parser import _fingerprint
|
|
|
|
a = _fingerprint(["Symbol", "Quantity", "Avg Price"])
|
|
b = _fingerprint(["symbol", "quantity", "avg price"])
|
|
c = _fingerprint([" SYMBOL ", "Quantity", " AVG PRICE"])
|
|
assert a == b == c
|
|
|
|
|
|
def test_fingerprint_differs_for_different_columns():
|
|
from app.services.llm_csv_parser import _fingerprint
|
|
|
|
a = _fingerprint(["Symbol", "Quantity"])
|
|
b = _fingerprint(["Symbol", "Quantity", "Avg Price"])
|
|
assert a != b
|
|
|
|
|
|
def test_fingerprint_is_sha256_hex_64_chars():
|
|
from app.services.llm_csv_parser import _fingerprint
|
|
|
|
f = _fingerprint(["Symbol", "Quantity"])
|
|
assert len(f) == 64
|
|
assert all(c in "0123456789abcdef" for c in f)
|
|
|
|
|
|
def test_detect_dialect_no_preamble_comma():
|
|
from app.services.llm_csv_parser import _detect_dialect
|
|
|
|
raw = b"Symbol,Quantity,Avg Price\nAAPL,100,150.25\nMSFT,50,310.00\n"
|
|
delimiter, preamble = _detect_dialect(raw)
|
|
assert delimiter == ","
|
|
assert preamble == 0
|
|
|
|
|
|
def test_detect_dialect_with_preamble():
|
|
from app.services.llm_csv_parser import _detect_dialect
|
|
|
|
raw = (
|
|
b"Statement,Header,Field Name,Field Value\n"
|
|
b"Statement,Data,BrokerName,Interactive Brokers LLC\n"
|
|
b"Statement,Data,Title,Activity Statement\n"
|
|
b"Statement,Data,Period,\"January 1, 2026 - January 31, 2026\"\n"
|
|
b"Symbol,Quantity,Avg Price,Currency,Description\n"
|
|
b"AAPL,100,150.25,USD,Apple Inc\n"
|
|
)
|
|
delimiter, preamble = _detect_dialect(raw)
|
|
assert delimiter == ","
|
|
# The data-row header line is the FIFTH line (index 4); preamble = 4.
|
|
assert preamble == 4
|
|
|
|
|
|
def test_detect_dialect_tab_delimited():
|
|
from app.services.llm_csv_parser import _detect_dialect
|
|
|
|
raw = b"Symbol\tQuantity\tAvg Price\nAAPL\t100\t150.25\n"
|
|
delimiter, preamble = _detect_dialect(raw)
|
|
assert delimiter == "\t"
|
|
assert preamble == 0
|
|
|
|
|
|
def test_detect_dialect_empty_raises():
|
|
from app.services.llm_csv_parser import LLMParseError, _detect_dialect
|
|
|
|
with pytest.raises(LLMParseError):
|
|
_detect_dialect(b"")
|
|
|
|
|
|
def test_validate_mapping_accepts_well_formed():
|
|
from app.services.llm_csv_parser import _validate_mapping
|
|
|
|
headers = ["Symbol", "Quantity", "Avg Price", "Currency"]
|
|
first_row = ["AAPL", "100", "150.25", "USD"]
|
|
mapping = {
|
|
"ticker_col": "Symbol",
|
|
"qty_col": "Quantity",
|
|
"cost_col": "Avg Price",
|
|
"currency_col": "Currency",
|
|
"name_col": None,
|
|
}
|
|
_validate_mapping(mapping, headers, first_row) # no raise
|
|
|
|
|
|
def test_validate_mapping_missing_ticker_raises():
|
|
from app.services.llm_csv_parser import LLMParseError, _validate_mapping
|
|
|
|
headers = ["Symbol", "Quantity"]
|
|
first_row = ["AAPL", "100"]
|
|
mapping = {"ticker_col": None, "qty_col": "Quantity"}
|
|
with pytest.raises(LLMParseError, match="ticker"):
|
|
_validate_mapping(mapping, headers, first_row)
|
|
|
|
|
|
def test_validate_mapping_missing_qty_raises():
|
|
from app.services.llm_csv_parser import LLMParseError, _validate_mapping
|
|
|
|
headers = ["Symbol", "Quantity"]
|
|
first_row = ["AAPL", "100"]
|
|
mapping = {"ticker_col": "Symbol", "qty_col": None}
|
|
with pytest.raises(LLMParseError, match="qty"):
|
|
_validate_mapping(mapping, headers, first_row)
|
|
|
|
|
|
def test_validate_mapping_unknown_column_raises():
|
|
from app.services.llm_csv_parser import LLMParseError, _validate_mapping
|
|
|
|
headers = ["Symbol", "Quantity"]
|
|
first_row = ["AAPL", "100"]
|
|
mapping = {"ticker_col": "Symbol", "qty_col": "NotARealColumn"}
|
|
with pytest.raises(LLMParseError, match="NotARealColumn"):
|
|
_validate_mapping(mapping, headers, first_row)
|
|
|
|
|
|
def test_validate_mapping_non_numeric_qty_raises():
|
|
from app.services.llm_csv_parser import LLMParseError, _validate_mapping
|
|
|
|
headers = ["Symbol", "Description"]
|
|
first_row = ["AAPL", "Apple Inc"]
|
|
# Mapping says qty is "Description", but "Apple Inc" can't parse as a number.
|
|
mapping = {"ticker_col": "Symbol", "qty_col": "Description"}
|
|
with pytest.raises(LLMParseError, match="numeric"):
|
|
_validate_mapping(mapping, headers, first_row)
|
|
|
|
|
|
def test_apply_mapping_builds_parsed_pie():
|
|
from app.services.csv_import import ParsedPie, ParsedPosition
|
|
from app.services.llm_csv_parser import _apply_mapping
|
|
|
|
headers = ["Symbol", "Quantity", "Avg Price", "Currency", "Description"]
|
|
data_rows = [
|
|
["AAPL", "100", "150.25", "USD", "Apple Inc"],
|
|
["MSFT", "50", "310.00", "USD", "Microsoft Corp"],
|
|
]
|
|
mapping = {
|
|
"ticker_col": "Symbol",
|
|
"qty_col": "Quantity",
|
|
"cost_col": "Avg Price",
|
|
"currency_col": "Currency",
|
|
"name_col": "Description",
|
|
}
|
|
|
|
pie = _apply_mapping(headers, data_rows, mapping)
|
|
|
|
assert isinstance(pie, ParsedPie)
|
|
assert len(pie.positions) == 2
|
|
p0 = pie.positions[0]
|
|
assert isinstance(p0, ParsedPosition)
|
|
assert p0.slice == "AAPL"
|
|
assert p0.name == "Apple Inc"
|
|
assert p0.quantity == 100.0
|
|
assert p0.invested_value == pytest.approx(15025.0)
|
|
# invested = qty * avg_cost = 100 * 150.25 = 15025
|
|
assert pie.invested == pytest.approx(15025.0 + 50 * 310.00)
|
|
|
|
|
|
def test_apply_mapping_handles_missing_optional_columns():
|
|
from app.services.llm_csv_parser import _apply_mapping
|
|
|
|
headers = ["Symbol", "Quantity"]
|
|
data_rows = [["AAPL", "100"]]
|
|
mapping = {
|
|
"ticker_col": "Symbol",
|
|
"qty_col": "Quantity",
|
|
"cost_col": None,
|
|
"currency_col": None,
|
|
"name_col": None,
|
|
}
|
|
|
|
pie = _apply_mapping(headers, data_rows, mapping)
|
|
p = pie.positions[0]
|
|
assert p.slice == "AAPL"
|
|
assert p.quantity == 100.0
|
|
assert p.invested_value is None
|
|
assert p.name == "AAPL" # falls back to ticker when name_col absent
|
|
|
|
|
|
def test_apply_mapping_skips_blank_and_unparseable_rows():
|
|
from app.services.llm_csv_parser import _apply_mapping
|
|
|
|
headers = ["Symbol", "Quantity"]
|
|
data_rows = [
|
|
["AAPL", "100"],
|
|
["", ""], # blank
|
|
["MSFT", "not-a-number"], # bad qty
|
|
["NVDA", "40"],
|
|
]
|
|
mapping = {"ticker_col": "Symbol", "qty_col": "Quantity"}
|
|
|
|
pie = _apply_mapping(headers, data_rows, mapping)
|
|
assert [p.slice for p in pie.positions] == ["AAPL", "NVDA"]
|