From a6d686324cc741d63f8344d7747e806d6defaa3b Mon Sep 17 00:00:00 2001
From: Giorgio Gilestro <giorgio@gilest.ro>
Date: Wed, 27 May 2026 21:18:29 +0200
Subject: [PATCH] models: align translation column naming + add token counts

Three recently-added tables (strategic_log_translations,
indicator_summary_translations, csv_format_templates) drifted from
the codebase's existing naming convention:
- llm_model -> model
- llm_cost_usd -> cost_usd
- content_md -> content  (on the two translation tables; csv_format
  doesn't have a content field)

Also added prompt_tokens and completion_tokens to the three tables;
they were silently dropped at write time despite LogResult exposing
them.

All writer call sites (ai_log_job, indicator_summary_job,
llm_csv_parser) and reader call sites (api.py localized helpers)
updated to match. Tests realigned.

Migration 0025 uses batch_alter_table for SQLite compatibility.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 .../0025_align_translation_columns.py         | 79 +++++++++++++++++++
 app/jobs/ai_log_job.py                        |  8 +-
 app/jobs/indicator_summary_job.py             |  8 +-
 app/models.py                                 | 22 ++++--
 app/routers/api.py                            |  6 +-
 app/services/llm_csv_parser.py                |  6 +-
 tests/test_llm_csv_parser.py                  | 14 ++--
 tests/test_localization_integration.py        | 14 ++--
 8 files changed, 125 insertions(+), 32 deletions(-)
 create mode 100644 alembic/versions/0025_align_translation_columns.py

diff --git a/alembic/versions/0025_align_translation_columns.py b/alembic/versions/0025_align_translation_columns.py
new file mode 100644
index 0000000..dbee1d7
--- /dev/null
+++ b/alembic/versions/0025_align_translation_columns.py
@@ -0,0 +1,79 @@
+"""align translation column naming + add token counts.
+
+Revision ID: 0025
+Revises: 0024
+Create Date: 2026-05-27
+"""
+from typing import Sequence, Union
+
+import sqlalchemy as sa
+from alembic import op
+
+
+revision: str = "0025"
+down_revision: Union[str, None] = "0024"
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # strategic_log_translations
+    with op.batch_alter_table("strategic_log_translations") as bop:
+        bop.alter_column("llm_model", new_column_name="model",
+                         existing_type=sa.String(length=64), existing_nullable=True)
+        bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
+                         existing_type=sa.Float(), existing_nullable=True)
+        bop.alter_column("content_md", new_column_name="content",
+                         existing_type=sa.Text(), existing_nullable=False)
+        bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
+        bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
+
+    # indicator_summary_translations
+    with op.batch_alter_table("indicator_summary_translations") as bop:
+        bop.alter_column("llm_model", new_column_name="model",
+                         existing_type=sa.String(length=64), existing_nullable=True)
+        bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
+                         existing_type=sa.Float(), existing_nullable=True)
+        bop.alter_column("content_md", new_column_name="content",
+                         existing_type=sa.Text(), existing_nullable=False)
+        bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
+        bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
+
+    # csv_format_templates
+    with op.batch_alter_table("csv_format_templates") as bop:
+        bop.alter_column("llm_model", new_column_name="model",
+                         existing_type=sa.String(length=64), existing_nullable=True)
+        bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
+                         existing_type=sa.Float(), existing_nullable=True)
+        bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
+        bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
+
+
+def downgrade() -> None:
+    with op.batch_alter_table("csv_format_templates") as bop:
+        bop.drop_column("completion_tokens")
+        bop.drop_column("prompt_tokens")
+        bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
+                         existing_type=sa.Float(), existing_nullable=True)
+        bop.alter_column("model", new_column_name="llm_model",
+                         existing_type=sa.String(length=64), existing_nullable=True)
+
+    with op.batch_alter_table("indicator_summary_translations") as bop:
+        bop.drop_column("completion_tokens")
+        bop.drop_column("prompt_tokens")
+        bop.alter_column("content", new_column_name="content_md",
+                         existing_type=sa.Text(), existing_nullable=False)
+        bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
+                         existing_type=sa.Float(), existing_nullable=True)
+        bop.alter_column("model", new_column_name="llm_model",
+                         existing_type=sa.String(length=64), existing_nullable=True)
+
+    with op.batch_alter_table("strategic_log_translations") as bop:
+        bop.drop_column("completion_tokens")
+        bop.drop_column("prompt_tokens")
+        bop.alter_column("content", new_column_name="content_md",
+                         existing_type=sa.Text(), existing_nullable=False)
+        bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
+                         existing_type=sa.Float(), existing_nullable=True)
+        bop.alter_column("model", new_column_name="llm_model",
+                         existing_type=sa.String(length=64), existing_nullable=True)
diff --git a/app/jobs/ai_log_job.py b/app/jobs/ai_log_job.py
index c0635a7..59da09d 100644
--- a/app/jobs/ai_log_job.py
+++ b/app/jobs/ai_log_job.py
@@ -74,10 +74,12 @@ async def translate_log_for_active_languages(session, log_id: int) -> None:
         translated_md, llm_result = result
         session.add(StrategicLogTranslation(
             log_id=log_id, lang=lang,
-            content_md=translated_md,
+            content=translated_md,
             generated_at=utcnow(),
-            llm_model=llm_result.model,
-            llm_cost_usd=llm_result.cost_usd,
+            model=llm_result.model,
+            prompt_tokens=llm_result.prompt_tokens,
+            completion_tokens=llm_result.completion_tokens,
+            cost_usd=llm_result.cost_usd,
         ))
     await session.commit()
 
diff --git a/app/jobs/indicator_summary_job.py b/app/jobs/indicator_summary_job.py
index 829077b..5f47221 100644
--- a/app/jobs/indicator_summary_job.py
+++ b/app/jobs/indicator_summary_job.py
@@ -77,10 +77,12 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
         translated_md, llm_result = result
         session.add(IndicatorSummaryTranslation(
             summary_id=summary_id, lang=lang,
-            content_md=translated_md,
+            content=translated_md,
             generated_at=utcnow(),
-            llm_model=llm_result.model,
-            llm_cost_usd=llm_result.cost_usd,
+            model=llm_result.model,
+            prompt_tokens=llm_result.prompt_tokens,
+            completion_tokens=llm_result.completion_tokens,
+            cost_usd=llm_result.cost_usd,
         ))
     await session.commit()
 
diff --git a/app/models.py b/app/models.py
index 4416501..57c9f19 100644
--- a/app/models.py
+++ b/app/models.py
@@ -141,12 +141,14 @@ class StrategicLogTranslation(Base):
         nullable=False,
     )
     lang: Mapped[str] = mapped_column(String(8), nullable=False)
-    content_md: Mapped[str] = mapped_column(Text, nullable=False)
+    content: Mapped[str] = mapped_column(Text, nullable=False)
     generated_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False, default=utcnow,
     )
-    llm_model: Mapped[str | None] = mapped_column(String(64))
-    llm_cost_usd: Mapped[float | None] = mapped_column(Float)
+    model: Mapped[str | None] = mapped_column(String(64))
+    prompt_tokens: Mapped[int | None] = mapped_column(Integer)
+    completion_tokens: Mapped[int | None] = mapped_column(Integer)
+    cost_usd: Mapped[float | None] = mapped_column(Float)
 
     __table_args__ = (
         UniqueConstraint("log_id", "lang", name="uq_slt_log_lang"),
@@ -191,12 +193,14 @@ class IndicatorSummaryTranslation(Base):
         nullable=False,
     )
     lang: Mapped[str] = mapped_column(String(8), nullable=False)
-    content_md: Mapped[str] = mapped_column(Text, nullable=False)
+    content: Mapped[str] = mapped_column(Text, nullable=False)
     generated_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False, default=utcnow,
     )
-    llm_model: Mapped[str | None] = mapped_column(String(64))
-    llm_cost_usd: Mapped[float | None] = mapped_column(Float)
+    model: Mapped[str | None] = mapped_column(String(64))
+    prompt_tokens: Mapped[int | None] = mapped_column(Integer)
+    completion_tokens: Mapped[int | None] = mapped_column(Integer)
+    cost_usd: Mapped[float | None] = mapped_column(Float)
 
     __table_args__ = (
         UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"),
@@ -535,5 +539,7 @@ class CsvFormatTemplate(Base):
     last_used_at: Mapped[datetime] = mapped_column(
         DateTime(timezone=True), nullable=False, default=utcnow,
     )
-    llm_model: Mapped[str | None] = mapped_column(String(64))
-    llm_cost_usd: Mapped[float | None] = mapped_column(Float)
+    model: Mapped[str | None] = mapped_column(String(64))
+    prompt_tokens: Mapped[int | None] = mapped_column(Integer)
+    completion_tokens: Mapped[int | None] = mapped_column(Integer)
+    cost_usd: Mapped[float | None] = mapped_column(Float)
diff --git a/app/routers/api.py b/app/routers/api.py
index 30c1c62..10a9f5a 100644
--- a/app/routers/api.py
+++ b/app/routers/api.py
@@ -326,7 +326,7 @@ async def _localized_content(
     row: StrategicLog | None,
     principal: CurrentUser | None,
 ) -> str | None:
-    """Return the translated content_md for ``row`` when the principal has
+    """Return the translated content for ``row`` when the principal has
     a non-English lang preference and a matching translation row exists.
     Returns None to signal 'use row.content as-is' (the default English
     path)."""
@@ -340,7 +340,7 @@ async def _localized_content(
         .where(StrategicLogTranslation.log_id == row.id)
         .where(StrategicLogTranslation.lang == lang)
     )).scalar_one_or_none()
-    return t.content_md if t is not None else None
+    return t.content if t is not None else None
 
 
 async def _apply_localized_summary(
@@ -364,7 +364,7 @@ async def _apply_localized_summary(
         .where(IndicatorSummaryTranslation.lang == lang)
     )).scalar_one_or_none()
     if t is not None:
-        row.content = t.content_md
+        row.content = t.content
 
 
 def _resolve_tone_param(tone: str | None) -> str:
diff --git a/app/services/llm_csv_parser.py b/app/services/llm_csv_parser.py
index 7bb84af..7c7c7a5 100644
--- a/app/services/llm_csv_parser.py
+++ b/app/services/llm_csv_parser.py
@@ -424,8 +424,10 @@ async def parse_with_llm(raw: bytes, session: AsyncSession) -> ParsedPie:
         first_seen_at=now,
         last_used_at=now,
         use_count=1,
-        llm_model=llm_log.model,
-        llm_cost_usd=llm_log.cost_usd,
+        model=llm_log.model,
+        prompt_tokens=llm_log.prompt_tokens,
+        completion_tokens=llm_log.completion_tokens,
+        cost_usd=llm_log.cost_usd,
     ))
     await session.commit()
     return pie
diff --git a/tests/test_llm_csv_parser.py b/tests/test_llm_csv_parser.py
index 15765b3..8d5d42f 100644
--- a/tests/test_llm_csv_parser.py
+++ b/tests/test_llm_csv_parser.py
@@ -22,8 +22,10 @@ def test_csv_format_template_model_columns():
     assert "first_seen_at" in cols
     assert "use_count" in cols
     assert "last_used_at" in cols
-    assert "llm_model" in cols
-    assert "llm_cost_usd" in cols
+    assert "model" in cols
+    assert "cost_usd" in cols
+    assert "prompt_tokens" in cols
+    assert "completion_tokens" in cols
     # Crucially, no user attribution.
     assert "user_id" not in cols
     assert "first_seen_user_id" not in cols
@@ -330,7 +332,7 @@ async def test_parse_with_llm_cache_miss_inserts_template(db_factory):
     assert tmpl.mapping["ticker_col"] == "Symbol"
     assert tmpl.broker_label == "Generic broker"
     assert tmpl.use_count == 1
-    assert tmpl.llm_cost_usd == pytest.approx(0.0002)
+    assert tmpl.cost_usd == pytest.approx(0.0002)
     # The crucial PII guarantee:
     assert not hasattr(tmpl, "user_id"), "sample row must not be linked to a user"
 
@@ -365,8 +367,8 @@ async def test_parse_with_llm_cache_hit_skips_llm(db_factory):
             first_seen_at=utcnow(),
             last_used_at=utcnow(),
             use_count=1,
-            llm_model="seed",
-            llm_cost_usd=0.0,
+            model="seed",
+            cost_usd=0.0,
         ))
         await session.commit()
 
@@ -410,7 +412,7 @@ async def test_parse_with_llm_stale_mapping_raises_but_does_not_evict(db_factory
             mapping={"ticker_col": "Symbol", "qty_col": "Symbol"},
             preamble_rows=0, delimiter=",", broker_label=None,
             first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1,
-            llm_model="seed", llm_cost_usd=0.0,
+            model="seed", cost_usd=0.0,
         ))
         await session.commit()
 
diff --git a/tests/test_localization_integration.py b/tests/test_localization_integration.py
index 6a1ea08..f527d5b 100644
--- a/tests/test_localization_integration.py
+++ b/tests/test_localization_integration.py
@@ -27,13 +27,13 @@ def test_strategic_log_translation_model_columns():
     cols = {c.name: c for c in inspect(StrategicLogTranslation).columns}
     assert "log_id" in cols
     assert "lang" in cols
-    assert "content_md" in cols
+    assert "content" in cols
     assert "generated_at" in cols
-    assert "llm_model" in cols
-    assert "llm_cost_usd" in cols
+    assert "model" in cols
+    assert "cost_usd" in cols
     assert cols["log_id"].nullable is False
     assert cols["lang"].nullable is False
-    assert cols["content_md"].nullable is False
+    assert cols["content"].nullable is False
 
 
 async def test_log_translation_fanout_no_active_non_en_users(db_factory, monkeypatch):
@@ -113,9 +113,9 @@ async def test_log_translation_fanout_italian_user(db_factory, monkeypatch):
     row = rows[0]
     assert row.log_id == log_id
     assert row.lang == "it"
-    assert row.content_md.startswith("# Apertura")
-    assert row.llm_model == "deepseek/deepseek-v4-flash"
-    assert row.llm_cost_usd == pytest.approx(0.00002)
+    assert row.content.startswith("# Apertura")
+    assert row.model == "deepseek/deepseek-v4-flash"
+    assert row.cost_usd == pytest.approx(0.00002)
 
 
 async def test_log_translation_fanout_per_language_failure_isolated(db_factory, monkeypatch):