models: align translation column naming + add token counts

Three recently-added tables (strategic_log_translations,
indicator_summary_translations, csv_format_templates) drifted from
the codebase's existing naming convention:
- llm_model -> model
- llm_cost_usd -> cost_usd
- content_md -> content  (on the two translation tables; csv_format
  doesn't have a content field)

Also added prompt_tokens and completion_tokens to the three tables;
they were silently dropped at write time despite LogResult exposing
them.

All writer call sites (ai_log_job, indicator_summary_job,
llm_csv_parser) and reader call sites (api.py localized helpers)
updated to match. Tests realigned.

Migration 0025 uses batch_alter_table for SQLite compatibility.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-27 21:18:29 +02:00
parent e4dc6d0071
commit a6d686324c
8 changed files with 125 additions and 32 deletions

View file

@ -0,0 +1,79 @@
"""align translation column naming + add token counts.
Revision ID: 0025
Revises: 0024
Create Date: 2026-05-27
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0025"
down_revision: Union[str, None] = "0024"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
# strategic_log_translations
with op.batch_alter_table("strategic_log_translations") as bop:
bop.alter_column("llm_model", new_column_name="model",
existing_type=sa.String(length=64), existing_nullable=True)
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
existing_type=sa.Float(), existing_nullable=True)
bop.alter_column("content_md", new_column_name="content",
existing_type=sa.Text(), existing_nullable=False)
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
# indicator_summary_translations
with op.batch_alter_table("indicator_summary_translations") as bop:
bop.alter_column("llm_model", new_column_name="model",
existing_type=sa.String(length=64), existing_nullable=True)
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
existing_type=sa.Float(), existing_nullable=True)
bop.alter_column("content_md", new_column_name="content",
existing_type=sa.Text(), existing_nullable=False)
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
# csv_format_templates
with op.batch_alter_table("csv_format_templates") as bop:
bop.alter_column("llm_model", new_column_name="model",
existing_type=sa.String(length=64), existing_nullable=True)
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
existing_type=sa.Float(), existing_nullable=True)
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
def downgrade() -> None:
with op.batch_alter_table("csv_format_templates") as bop:
bop.drop_column("completion_tokens")
bop.drop_column("prompt_tokens")
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
existing_type=sa.Float(), existing_nullable=True)
bop.alter_column("model", new_column_name="llm_model",
existing_type=sa.String(length=64), existing_nullable=True)
with op.batch_alter_table("indicator_summary_translations") as bop:
bop.drop_column("completion_tokens")
bop.drop_column("prompt_tokens")
bop.alter_column("content", new_column_name="content_md",
existing_type=sa.Text(), existing_nullable=False)
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
existing_type=sa.Float(), existing_nullable=True)
bop.alter_column("model", new_column_name="llm_model",
existing_type=sa.String(length=64), existing_nullable=True)
with op.batch_alter_table("strategic_log_translations") as bop:
bop.drop_column("completion_tokens")
bop.drop_column("prompt_tokens")
bop.alter_column("content", new_column_name="content_md",
existing_type=sa.Text(), existing_nullable=False)
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
existing_type=sa.Float(), existing_nullable=True)
bop.alter_column("model", new_column_name="llm_model",
existing_type=sa.String(length=64), existing_nullable=True)

View file

@ -74,10 +74,12 @@ async def translate_log_for_active_languages(session, log_id: int) -> None:
translated_md, llm_result = result translated_md, llm_result = result
session.add(StrategicLogTranslation( session.add(StrategicLogTranslation(
log_id=log_id, lang=lang, log_id=log_id, lang=lang,
content_md=translated_md, content=translated_md,
generated_at=utcnow(), generated_at=utcnow(),
llm_model=llm_result.model, model=llm_result.model,
llm_cost_usd=llm_result.cost_usd, prompt_tokens=llm_result.prompt_tokens,
completion_tokens=llm_result.completion_tokens,
cost_usd=llm_result.cost_usd,
)) ))
await session.commit() await session.commit()

View file

@ -77,10 +77,12 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
translated_md, llm_result = result translated_md, llm_result = result
session.add(IndicatorSummaryTranslation( session.add(IndicatorSummaryTranslation(
summary_id=summary_id, lang=lang, summary_id=summary_id, lang=lang,
content_md=translated_md, content=translated_md,
generated_at=utcnow(), generated_at=utcnow(),
llm_model=llm_result.model, model=llm_result.model,
llm_cost_usd=llm_result.cost_usd, prompt_tokens=llm_result.prompt_tokens,
completion_tokens=llm_result.completion_tokens,
cost_usd=llm_result.cost_usd,
)) ))
await session.commit() await session.commit()

View file

@ -141,12 +141,14 @@ class StrategicLogTranslation(Base):
nullable=False, nullable=False,
) )
lang: Mapped[str] = mapped_column(String(8), nullable=False) lang: Mapped[str] = mapped_column(String(8), nullable=False)
content_md: Mapped[str] = mapped_column(Text, nullable=False) content: Mapped[str] = mapped_column(Text, nullable=False)
generated_at: Mapped[datetime] = mapped_column( generated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, default=utcnow, DateTime(timezone=True), nullable=False, default=utcnow,
) )
llm_model: Mapped[str | None] = mapped_column(String(64)) model: Mapped[str | None] = mapped_column(String(64))
llm_cost_usd: Mapped[float | None] = mapped_column(Float) prompt_tokens: Mapped[int | None] = mapped_column(Integer)
completion_tokens: Mapped[int | None] = mapped_column(Integer)
cost_usd: Mapped[float | None] = mapped_column(Float)
__table_args__ = ( __table_args__ = (
UniqueConstraint("log_id", "lang", name="uq_slt_log_lang"), UniqueConstraint("log_id", "lang", name="uq_slt_log_lang"),
@ -191,12 +193,14 @@ class IndicatorSummaryTranslation(Base):
nullable=False, nullable=False,
) )
lang: Mapped[str] = mapped_column(String(8), nullable=False) lang: Mapped[str] = mapped_column(String(8), nullable=False)
content_md: Mapped[str] = mapped_column(Text, nullable=False) content: Mapped[str] = mapped_column(Text, nullable=False)
generated_at: Mapped[datetime] = mapped_column( generated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, default=utcnow, DateTime(timezone=True), nullable=False, default=utcnow,
) )
llm_model: Mapped[str | None] = mapped_column(String(64)) model: Mapped[str | None] = mapped_column(String(64))
llm_cost_usd: Mapped[float | None] = mapped_column(Float) prompt_tokens: Mapped[int | None] = mapped_column(Integer)
completion_tokens: Mapped[int | None] = mapped_column(Integer)
cost_usd: Mapped[float | None] = mapped_column(Float)
__table_args__ = ( __table_args__ = (
UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"), UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"),
@ -535,5 +539,7 @@ class CsvFormatTemplate(Base):
last_used_at: Mapped[datetime] = mapped_column( last_used_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, default=utcnow, DateTime(timezone=True), nullable=False, default=utcnow,
) )
llm_model: Mapped[str | None] = mapped_column(String(64)) model: Mapped[str | None] = mapped_column(String(64))
llm_cost_usd: Mapped[float | None] = mapped_column(Float) prompt_tokens: Mapped[int | None] = mapped_column(Integer)
completion_tokens: Mapped[int | None] = mapped_column(Integer)
cost_usd: Mapped[float | None] = mapped_column(Float)

View file

@ -326,7 +326,7 @@ async def _localized_content(
row: StrategicLog | None, row: StrategicLog | None,
principal: CurrentUser | None, principal: CurrentUser | None,
) -> str | None: ) -> str | None:
"""Return the translated content_md for ``row`` when the principal has """Return the translated content for ``row`` when the principal has
a non-English lang preference and a matching translation row exists. a non-English lang preference and a matching translation row exists.
Returns None to signal 'use row.content as-is' (the default English Returns None to signal 'use row.content as-is' (the default English
path).""" path)."""
@ -340,7 +340,7 @@ async def _localized_content(
.where(StrategicLogTranslation.log_id == row.id) .where(StrategicLogTranslation.log_id == row.id)
.where(StrategicLogTranslation.lang == lang) .where(StrategicLogTranslation.lang == lang)
)).scalar_one_or_none() )).scalar_one_or_none()
return t.content_md if t is not None else None return t.content if t is not None else None
async def _apply_localized_summary( async def _apply_localized_summary(
@ -364,7 +364,7 @@ async def _apply_localized_summary(
.where(IndicatorSummaryTranslation.lang == lang) .where(IndicatorSummaryTranslation.lang == lang)
)).scalar_one_or_none() )).scalar_one_or_none()
if t is not None: if t is not None:
row.content = t.content_md row.content = t.content
def _resolve_tone_param(tone: str | None) -> str: def _resolve_tone_param(tone: str | None) -> str:

View file

@ -424,8 +424,10 @@ async def parse_with_llm(raw: bytes, session: AsyncSession) -> ParsedPie:
first_seen_at=now, first_seen_at=now,
last_used_at=now, last_used_at=now,
use_count=1, use_count=1,
llm_model=llm_log.model, model=llm_log.model,
llm_cost_usd=llm_log.cost_usd, prompt_tokens=llm_log.prompt_tokens,
completion_tokens=llm_log.completion_tokens,
cost_usd=llm_log.cost_usd,
)) ))
await session.commit() await session.commit()
return pie return pie

View file

@ -22,8 +22,10 @@ def test_csv_format_template_model_columns():
assert "first_seen_at" in cols assert "first_seen_at" in cols
assert "use_count" in cols assert "use_count" in cols
assert "last_used_at" in cols assert "last_used_at" in cols
assert "llm_model" in cols assert "model" in cols
assert "llm_cost_usd" in cols assert "cost_usd" in cols
assert "prompt_tokens" in cols
assert "completion_tokens" in cols
# Crucially, no user attribution. # Crucially, no user attribution.
assert "user_id" not in cols assert "user_id" not in cols
assert "first_seen_user_id" not in cols assert "first_seen_user_id" not in cols
@ -330,7 +332,7 @@ async def test_parse_with_llm_cache_miss_inserts_template(db_factory):
assert tmpl.mapping["ticker_col"] == "Symbol" assert tmpl.mapping["ticker_col"] == "Symbol"
assert tmpl.broker_label == "Generic broker" assert tmpl.broker_label == "Generic broker"
assert tmpl.use_count == 1 assert tmpl.use_count == 1
assert tmpl.llm_cost_usd == pytest.approx(0.0002) assert tmpl.cost_usd == pytest.approx(0.0002)
# The crucial PII guarantee: # The crucial PII guarantee:
assert not hasattr(tmpl, "user_id"), "sample row must not be linked to a user" assert not hasattr(tmpl, "user_id"), "sample row must not be linked to a user"
@ -365,8 +367,8 @@ async def test_parse_with_llm_cache_hit_skips_llm(db_factory):
first_seen_at=utcnow(), first_seen_at=utcnow(),
last_used_at=utcnow(), last_used_at=utcnow(),
use_count=1, use_count=1,
llm_model="seed", model="seed",
llm_cost_usd=0.0, cost_usd=0.0,
)) ))
await session.commit() await session.commit()
@ -410,7 +412,7 @@ async def test_parse_with_llm_stale_mapping_raises_but_does_not_evict(db_factory
mapping={"ticker_col": "Symbol", "qty_col": "Symbol"}, mapping={"ticker_col": "Symbol", "qty_col": "Symbol"},
preamble_rows=0, delimiter=",", broker_label=None, preamble_rows=0, delimiter=",", broker_label=None,
first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1, first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1,
llm_model="seed", llm_cost_usd=0.0, model="seed", cost_usd=0.0,
)) ))
await session.commit() await session.commit()

View file

@ -27,13 +27,13 @@ def test_strategic_log_translation_model_columns():
cols = {c.name: c for c in inspect(StrategicLogTranslation).columns} cols = {c.name: c for c in inspect(StrategicLogTranslation).columns}
assert "log_id" in cols assert "log_id" in cols
assert "lang" in cols assert "lang" in cols
assert "content_md" in cols assert "content" in cols
assert "generated_at" in cols assert "generated_at" in cols
assert "llm_model" in cols assert "model" in cols
assert "llm_cost_usd" in cols assert "cost_usd" in cols
assert cols["log_id"].nullable is False assert cols["log_id"].nullable is False
assert cols["lang"].nullable is False assert cols["lang"].nullable is False
assert cols["content_md"].nullable is False assert cols["content"].nullable is False
async def test_log_translation_fanout_no_active_non_en_users(db_factory, monkeypatch): async def test_log_translation_fanout_no_active_non_en_users(db_factory, monkeypatch):
@ -113,9 +113,9 @@ async def test_log_translation_fanout_italian_user(db_factory, monkeypatch):
row = rows[0] row = rows[0]
assert row.log_id == log_id assert row.log_id == log_id
assert row.lang == "it" assert row.lang == "it"
assert row.content_md.startswith("# Apertura") assert row.content.startswith("# Apertura")
assert row.llm_model == "deepseek/deepseek-v4-flash" assert row.model == "deepseek/deepseek-v4-flash"
assert row.llm_cost_usd == pytest.approx(0.00002) assert row.cost_usd == pytest.approx(0.00002)
async def test_log_translation_fanout_per_language_failure_isolated(db_factory, monkeypatch): async def test_log_translation_fanout_per_language_failure_isolated(db_factory, monkeypatch):