models: align translation column naming + add token counts

Three recently-added tables (strategic_log_translations, indicator_summary_translations, csv_format_templates) drifted from the codebase's existing naming convention: - llm_model -> model - llm_cost_usd -> cost_usd - content_md -> content (on the two translation tables; csv_format doesn't have a content field) Also added prompt_tokens and completion_tokens to the three tables; they were silently dropped at write time despite LogResult exposing them. All writer call sites (ai_log_job, indicator_summary_job, llm_csv_parser) and reader call sites (api.py localized helpers) updated to match. Tests realigned. Migration 0025 uses batch_alter_table for SQLite compatibility. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 21:18:29 +02:00 · 2026-05-27 21:18:29 +02:00 · a6d686324c
commit a6d686324c
parent e4dc6d0071
8 changed files with 125 additions and 32 deletions
--- a/alembic/versions/0025_align_translation_columns.py
+++ b/alembic/versions/0025_align_translation_columns.py
@ -0,0 +1,79 @@
 """align translation column naming + add token counts.
 Revision ID: 0025
 Revises: 0024
 Create Date: 2026-05-27
 """
 from typing import Sequence, Union
 import sqlalchemy as sa
 from alembic import op
 revision: str = "0025"
 down_revision: Union[str, None] = "0024"
 branch_labels: Union[str, Sequence[str], None] = None
 depends_on: Union[str, Sequence[str], None] = None
 def upgrade() -> None:
    # strategic_log_translations
    with op.batch_alter_table("strategic_log_translations") as bop:
        bop.alter_column("llm_model", new_column_name="model",
                         existing_type=sa.String(length=64), existing_nullable=True)
        bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
                         existing_type=sa.Float(), existing_nullable=True)
        bop.alter_column("content_md", new_column_name="content",
                         existing_type=sa.Text(), existing_nullable=False)
        bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
        bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
    # indicator_summary_translations
    with op.batch_alter_table("indicator_summary_translations") as bop:
        bop.alter_column("llm_model", new_column_name="model",
                         existing_type=sa.String(length=64), existing_nullable=True)
        bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
                         existing_type=sa.Float(), existing_nullable=True)
        bop.alter_column("content_md", new_column_name="content",
                         existing_type=sa.Text(), existing_nullable=False)
        bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
        bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
    # csv_format_templates
    with op.batch_alter_table("csv_format_templates") as bop:
        bop.alter_column("llm_model", new_column_name="model",
                         existing_type=sa.String(length=64), existing_nullable=True)
        bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
                         existing_type=sa.Float(), existing_nullable=True)
        bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
        bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
 def downgrade() -> None:
    with op.batch_alter_table("csv_format_templates") as bop:
        bop.drop_column("completion_tokens")
        bop.drop_column("prompt_tokens")
        bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
                         existing_type=sa.Float(), existing_nullable=True)
        bop.alter_column("model", new_column_name="llm_model",
                         existing_type=sa.String(length=64), existing_nullable=True)
    with op.batch_alter_table("indicator_summary_translations") as bop:
        bop.drop_column("completion_tokens")
        bop.drop_column("prompt_tokens")
        bop.alter_column("content", new_column_name="content_md",
                         existing_type=sa.Text(), existing_nullable=False)
        bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
                         existing_type=sa.Float(), existing_nullable=True)
        bop.alter_column("model", new_column_name="llm_model",
                         existing_type=sa.String(length=64), existing_nullable=True)
    with op.batch_alter_table("strategic_log_translations") as bop:
        bop.drop_column("completion_tokens")
        bop.drop_column("prompt_tokens")
        bop.alter_column("content", new_column_name="content_md",
                         existing_type=sa.Text(), existing_nullable=False)
        bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
                         existing_type=sa.Float(), existing_nullable=True)
        bop.alter_column("model", new_column_name="llm_model",
                         existing_type=sa.String(length=64), existing_nullable=True)
--- a/app/jobs/ai_log_job.py
+++ b/app/jobs/ai_log_job.py
@ -74,10 +74,12 @@ async def translate_log_for_active_languages(session, log_id: int) -> None:
        translated_md, llm_result = result
        session.add(StrategicLogTranslation(
            log_id=log_id, lang=lang,
-            content_md=translated_md,
+            content=translated_md,
            generated_at=utcnow(),
-            llm_model=llm_result.model,
+            model=llm_result.model,
-            llm_cost_usd=llm_result.cost_usd,
+            prompt_tokens=llm_result.prompt_tokens,
            completion_tokens=llm_result.completion_tokens,
            cost_usd=llm_result.cost_usd,
        ))
    await session.commit()
--- a/app/jobs/indicator_summary_job.py
+++ b/app/jobs/indicator_summary_job.py
@ -77,10 +77,12 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
        translated_md, llm_result = result
        session.add(IndicatorSummaryTranslation(
            summary_id=summary_id, lang=lang,
-            content_md=translated_md,
+            content=translated_md,
            generated_at=utcnow(),
-            llm_model=llm_result.model,
+            model=llm_result.model,
-            llm_cost_usd=llm_result.cost_usd,
+            prompt_tokens=llm_result.prompt_tokens,
            completion_tokens=llm_result.completion_tokens,
            cost_usd=llm_result.cost_usd,
        ))
    await session.commit()
--- a/app/models.py
+++ b/app/models.py
@ -141,12 +141,14 @@ class StrategicLogTranslation(Base):
        nullable=False,
    )
    lang: Mapped[str] = mapped_column(String(8), nullable=False)
-    content_md: Mapped[str] = mapped_column(Text, nullable=False)
+    content: Mapped[str] = mapped_column(Text, nullable=False)
    generated_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, default=utcnow,
    )
-    llm_model: Mapped[str | None] = mapped_column(String(64))
+    model: Mapped[str | None] = mapped_column(String(64))
-    llm_cost_usd: Mapped[float | None] = mapped_column(Float)
+    prompt_tokens: Mapped[int | None] = mapped_column(Integer)
    completion_tokens: Mapped[int | None] = mapped_column(Integer)
    cost_usd: Mapped[float | None] = mapped_column(Float)
    __table_args__ = (
        UniqueConstraint("log_id", "lang", name="uq_slt_log_lang"),
@ -191,12 +193,14 @@ class IndicatorSummaryTranslation(Base):
        nullable=False,
    )
    lang: Mapped[str] = mapped_column(String(8), nullable=False)
-    content_md: Mapped[str] = mapped_column(Text, nullable=False)
+    content: Mapped[str] = mapped_column(Text, nullable=False)
    generated_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, default=utcnow,
    )
-    llm_model: Mapped[str | None] = mapped_column(String(64))
+    model: Mapped[str | None] = mapped_column(String(64))
-    llm_cost_usd: Mapped[float | None] = mapped_column(Float)
+    prompt_tokens: Mapped[int | None] = mapped_column(Integer)
    completion_tokens: Mapped[int | None] = mapped_column(Integer)
    cost_usd: Mapped[float | None] = mapped_column(Float)
    __table_args__ = (
        UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"),
@ -535,5 +539,7 @@ class CsvFormatTemplate(Base):
    last_used_at: Mapped[datetime] = mapped_column(
        DateTime(timezone=True), nullable=False, default=utcnow,
    )
-    llm_model: Mapped[str | None] = mapped_column(String(64))
+    model: Mapped[str | None] = mapped_column(String(64))
-    llm_cost_usd: Mapped[float | None] = mapped_column(Float)
+    prompt_tokens: Mapped[int | None] = mapped_column(Integer)
    completion_tokens: Mapped[int | None] = mapped_column(Integer)
    cost_usd: Mapped[float | None] = mapped_column(Float)
--- a/app/routers/api.py
+++ b/app/routers/api.py
@ -326,7 +326,7 @@ async def _localized_content(
    row: StrategicLog | None,
    principal: CurrentUser | None,
 ) -> str | None:
-    """Return the translated content_md for ``row`` when the principal has
+    """Return the translated content for ``row`` when the principal has
    a non-English lang preference and a matching translation row exists.
    Returns None to signal 'use row.content as-is' (the default English
    path)."""
@ -340,7 +340,7 @@ async def _localized_content(
        .where(StrategicLogTranslation.log_id == row.id)
        .where(StrategicLogTranslation.lang == lang)
    )).scalar_one_or_none()
-    return t.content_md if t is not None else None
+    return t.content if t is not None else None
 async def _apply_localized_summary(
@ -364,7 +364,7 @@ async def _apply_localized_summary(
        .where(IndicatorSummaryTranslation.lang == lang)
    )).scalar_one_or_none()
    if t is not None:
-        row.content = t.content_md
+        row.content = t.content
 def _resolve_tone_param(tone: str | None) -> str:
--- a/app/services/llm_csv_parser.py
+++ b/app/services/llm_csv_parser.py
@ -424,8 +424,10 @@ async def parse_with_llm(raw: bytes, session: AsyncSession) -> ParsedPie:
        first_seen_at=now,
        last_used_at=now,
        use_count=1,
-        llm_model=llm_log.model,
+        model=llm_log.model,
-        llm_cost_usd=llm_log.cost_usd,
+        prompt_tokens=llm_log.prompt_tokens,
        completion_tokens=llm_log.completion_tokens,
        cost_usd=llm_log.cost_usd,
    ))
    await session.commit()
    return pie
--- a/tests/test_llm_csv_parser.py
+++ b/tests/test_llm_csv_parser.py
@ -22,8 +22,10 @@ def test_csv_format_template_model_columns():
    assert "first_seen_at" in cols
    assert "use_count" in cols
    assert "last_used_at" in cols
-    assert "llm_model" in cols
+    assert "model" in cols
-    assert "llm_cost_usd" in cols
+    assert "cost_usd" in cols
    assert "prompt_tokens" in cols
    assert "completion_tokens" in cols
    # Crucially, no user attribution.
    assert "user_id" not in cols
    assert "first_seen_user_id" not in cols
@ -330,7 +332,7 @@ async def test_parse_with_llm_cache_miss_inserts_template(db_factory):
    assert tmpl.mapping["ticker_col"] == "Symbol"
    assert tmpl.broker_label == "Generic broker"
    assert tmpl.use_count == 1
-    assert tmpl.llm_cost_usd == pytest.approx(0.0002)
+    assert tmpl.cost_usd == pytest.approx(0.0002)
    # The crucial PII guarantee:
    assert not hasattr(tmpl, "user_id"), "sample row must not be linked to a user"
@ -365,8 +367,8 @@ async def test_parse_with_llm_cache_hit_skips_llm(db_factory):
            first_seen_at=utcnow(),
            last_used_at=utcnow(),
            use_count=1,
-            llm_model="seed",
+            model="seed",
-            llm_cost_usd=0.0,
+            cost_usd=0.0,
        ))
        await session.commit()
@ -410,7 +412,7 @@ async def test_parse_with_llm_stale_mapping_raises_but_does_not_evict(db_factory
            mapping={"ticker_col": "Symbol", "qty_col": "Symbol"},
            preamble_rows=0, delimiter=",", broker_label=None,
            first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1,
-            llm_model="seed", llm_cost_usd=0.0,
+            model="seed", cost_usd=0.0,
        ))
        await session.commit()
--- a/tests/test_localization_integration.py
+++ b/tests/test_localization_integration.py
@ -27,13 +27,13 @@ def test_strategic_log_translation_model_columns():
    cols = {c.name: c for c in inspect(StrategicLogTranslation).columns}
    assert "log_id" in cols
    assert "lang" in cols
-    assert "content_md" in cols
+    assert "content" in cols
    assert "generated_at" in cols
-    assert "llm_model" in cols
+    assert "model" in cols
-    assert "llm_cost_usd" in cols
+    assert "cost_usd" in cols
    assert cols["log_id"].nullable is False
    assert cols["lang"].nullable is False
-    assert cols["content_md"].nullable is False
+    assert cols["content"].nullable is False
 async def test_log_translation_fanout_no_active_non_en_users(db_factory, monkeypatch):
@ -113,9 +113,9 @@ async def test_log_translation_fanout_italian_user(db_factory, monkeypatch):
    row = rows[0]
    assert row.log_id == log_id
    assert row.lang == "it"
-    assert row.content_md.startswith("# Apertura")
+    assert row.content.startswith("# Apertura")
-    assert row.llm_model == "deepseek/deepseek-v4-flash"
+    assert row.model == "deepseek/deepseek-v4-flash"
-    assert row.llm_cost_usd == pytest.approx(0.00002)
+    assert row.cost_usd == pytest.approx(0.00002)
 async def test_log_translation_fanout_per_language_failure_isolated(db_factory, monkeypatch):