csv-parser: add CsvFormatTemplate model
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
08b4dddcdd
commit
3f1d2a1034
2 changed files with 66 additions and 0 deletions
|
|
@ -426,3 +426,38 @@ class StripeEvent(Base):
|
||||||
UniqueConstraint("event_id", name="uq_stripe_events_event_id"),
|
UniqueConstraint("event_id", name="uq_stripe_events_event_id"),
|
||||||
Index("ix_stripe_events_type_received", "event_type", "received_at"),
|
Index("ix_stripe_events_type_received", "event_type", "received_at"),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class CsvFormatTemplate(Base):
|
||||||
|
"""Cached column-mapping for a single broker CSV format.
|
||||||
|
|
||||||
|
Populated on the first upload of a previously-unseen format via the
|
||||||
|
LLM-fallback parser. Subsequent uploads of the same format
|
||||||
|
(identified by ``fingerprint``, a sha256 of the normalised header
|
||||||
|
row) replay ``mapping`` deterministically with no LLM call.
|
||||||
|
|
||||||
|
The table holds the actual ``headers`` and one anonymous ``sample_row``
|
||||||
|
from the originating upload — there is no ``user_id`` column, no link
|
||||||
|
back to the uploader. The sample exists so the operator has concrete
|
||||||
|
material to look at when hand-writing future native parsers; the
|
||||||
|
system never auto-generates or modifies parser code from this data.
|
||||||
|
"""
|
||||||
|
__tablename__ = "csv_format_templates"
|
||||||
|
|
||||||
|
id: Mapped[int] = mapped_column(_PK, primary_key=True, autoincrement=True)
|
||||||
|
fingerprint: Mapped[str] = mapped_column(String(64), unique=True, nullable=False)
|
||||||
|
headers: Mapped[list] = mapped_column(JSON, nullable=False)
|
||||||
|
sample_row: Mapped[list] = mapped_column(JSON, nullable=False)
|
||||||
|
mapping: Mapped[dict] = mapped_column(JSON, nullable=False)
|
||||||
|
preamble_rows: Mapped[int] = mapped_column(Integer, nullable=False, default=0)
|
||||||
|
delimiter: Mapped[str] = mapped_column(String(1), nullable=False, default=",")
|
||||||
|
broker_label: Mapped[str | None] = mapped_column(String(128))
|
||||||
|
first_seen_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), nullable=False, default=utcnow,
|
||||||
|
)
|
||||||
|
use_count: Mapped[int] = mapped_column(Integer, nullable=False, default=1)
|
||||||
|
last_used_at: Mapped[datetime] = mapped_column(
|
||||||
|
DateTime(timezone=True), nullable=False, default=utcnow,
|
||||||
|
)
|
||||||
|
llm_model: Mapped[str | None] = mapped_column(String(64))
|
||||||
|
llm_cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||||
|
|
|
||||||
31
tests/test_llm_csv_parser.py
Normal file
31
tests/test_llm_csv_parser.py
Normal file
|
|
@ -0,0 +1,31 @@
|
||||||
|
"""Unit + integration tests for the LLM-fallback CSV parser."""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
|
def test_csv_format_template_model_columns():
|
||||||
|
"""Model exposes every column the spec requires, with correct types."""
|
||||||
|
from sqlalchemy import inspect
|
||||||
|
|
||||||
|
from app.models import CsvFormatTemplate
|
||||||
|
|
||||||
|
cols = {c.name: c for c in inspect(CsvFormatTemplate).columns}
|
||||||
|
assert "fingerprint" in cols
|
||||||
|
assert "headers" in cols
|
||||||
|
assert "sample_row" in cols
|
||||||
|
assert "mapping" in cols
|
||||||
|
assert "preamble_rows" in cols
|
||||||
|
assert "delimiter" in cols
|
||||||
|
assert "broker_label" in cols
|
||||||
|
assert "first_seen_at" in cols
|
||||||
|
assert "use_count" in cols
|
||||||
|
assert "last_used_at" in cols
|
||||||
|
assert "llm_model" in cols
|
||||||
|
assert "llm_cost_usd" in cols
|
||||||
|
# Crucially, no user attribution.
|
||||||
|
assert "user_id" not in cols
|
||||||
|
assert "first_seen_user_id" not in cols
|
||||||
|
# Fingerprint is the cache key.
|
||||||
|
assert cols["fingerprint"].unique is True
|
||||||
|
assert cols["fingerprint"].nullable is False
|
||||||
Loading…
Add table
Add a link
Reference in a new issue