From a6d686324cc741d63f8344d7747e806d6defaa3b Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Wed, 27 May 2026 21:18:29 +0200 Subject: [PATCH] models: align translation column naming + add token counts Three recently-added tables (strategic_log_translations, indicator_summary_translations, csv_format_templates) drifted from the codebase's existing naming convention: - llm_model -> model - llm_cost_usd -> cost_usd - content_md -> content (on the two translation tables; csv_format doesn't have a content field) Also added prompt_tokens and completion_tokens to the three tables; they were silently dropped at write time despite LogResult exposing them. All writer call sites (ai_log_job, indicator_summary_job, llm_csv_parser) and reader call sites (api.py localized helpers) updated to match. Tests realigned. Migration 0025 uses batch_alter_table for SQLite compatibility. Co-Authored-By: Claude Opus 4.7 --- .../0025_align_translation_columns.py | 79 +++++++++++++++++++ app/jobs/ai_log_job.py | 8 +- app/jobs/indicator_summary_job.py | 8 +- app/models.py | 22 ++++-- app/routers/api.py | 6 +- app/services/llm_csv_parser.py | 6 +- tests/test_llm_csv_parser.py | 14 ++-- tests/test_localization_integration.py | 14 ++-- 8 files changed, 125 insertions(+), 32 deletions(-) create mode 100644 alembic/versions/0025_align_translation_columns.py diff --git a/alembic/versions/0025_align_translation_columns.py b/alembic/versions/0025_align_translation_columns.py new file mode 100644 index 0000000..dbee1d7 --- /dev/null +++ b/alembic/versions/0025_align_translation_columns.py @@ -0,0 +1,79 @@ +"""align translation column naming + add token counts. + +Revision ID: 0025 +Revises: 0024 +Create Date: 2026-05-27 +""" +from typing import Sequence, Union + +import sqlalchemy as sa +from alembic import op + + +revision: str = "0025" +down_revision: Union[str, None] = "0024" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # strategic_log_translations + with op.batch_alter_table("strategic_log_translations") as bop: + bop.alter_column("llm_model", new_column_name="model", + existing_type=sa.String(length=64), existing_nullable=True) + bop.alter_column("llm_cost_usd", new_column_name="cost_usd", + existing_type=sa.Float(), existing_nullable=True) + bop.alter_column("content_md", new_column_name="content", + existing_type=sa.Text(), existing_nullable=False) + bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True)) + bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True)) + + # indicator_summary_translations + with op.batch_alter_table("indicator_summary_translations") as bop: + bop.alter_column("llm_model", new_column_name="model", + existing_type=sa.String(length=64), existing_nullable=True) + bop.alter_column("llm_cost_usd", new_column_name="cost_usd", + existing_type=sa.Float(), existing_nullable=True) + bop.alter_column("content_md", new_column_name="content", + existing_type=sa.Text(), existing_nullable=False) + bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True)) + bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True)) + + # csv_format_templates + with op.batch_alter_table("csv_format_templates") as bop: + bop.alter_column("llm_model", new_column_name="model", + existing_type=sa.String(length=64), existing_nullable=True) + bop.alter_column("llm_cost_usd", new_column_name="cost_usd", + existing_type=sa.Float(), existing_nullable=True) + bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True)) + bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True)) + + +def downgrade() -> None: + with op.batch_alter_table("csv_format_templates") as bop: + bop.drop_column("completion_tokens") + bop.drop_column("prompt_tokens") + bop.alter_column("cost_usd", new_column_name="llm_cost_usd", + existing_type=sa.Float(), existing_nullable=True) + bop.alter_column("model", new_column_name="llm_model", + existing_type=sa.String(length=64), existing_nullable=True) + + with op.batch_alter_table("indicator_summary_translations") as bop: + bop.drop_column("completion_tokens") + bop.drop_column("prompt_tokens") + bop.alter_column("content", new_column_name="content_md", + existing_type=sa.Text(), existing_nullable=False) + bop.alter_column("cost_usd", new_column_name="llm_cost_usd", + existing_type=sa.Float(), existing_nullable=True) + bop.alter_column("model", new_column_name="llm_model", + existing_type=sa.String(length=64), existing_nullable=True) + + with op.batch_alter_table("strategic_log_translations") as bop: + bop.drop_column("completion_tokens") + bop.drop_column("prompt_tokens") + bop.alter_column("content", new_column_name="content_md", + existing_type=sa.Text(), existing_nullable=False) + bop.alter_column("cost_usd", new_column_name="llm_cost_usd", + existing_type=sa.Float(), existing_nullable=True) + bop.alter_column("model", new_column_name="llm_model", + existing_type=sa.String(length=64), existing_nullable=True) diff --git a/app/jobs/ai_log_job.py b/app/jobs/ai_log_job.py index c0635a7..59da09d 100644 --- a/app/jobs/ai_log_job.py +++ b/app/jobs/ai_log_job.py @@ -74,10 +74,12 @@ async def translate_log_for_active_languages(session, log_id: int) -> None: translated_md, llm_result = result session.add(StrategicLogTranslation( log_id=log_id, lang=lang, - content_md=translated_md, + content=translated_md, generated_at=utcnow(), - llm_model=llm_result.model, - llm_cost_usd=llm_result.cost_usd, + model=llm_result.model, + prompt_tokens=llm_result.prompt_tokens, + completion_tokens=llm_result.completion_tokens, + cost_usd=llm_result.cost_usd, )) await session.commit() diff --git a/app/jobs/indicator_summary_job.py b/app/jobs/indicator_summary_job.py index 829077b..5f47221 100644 --- a/app/jobs/indicator_summary_job.py +++ b/app/jobs/indicator_summary_job.py @@ -77,10 +77,12 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No translated_md, llm_result = result session.add(IndicatorSummaryTranslation( summary_id=summary_id, lang=lang, - content_md=translated_md, + content=translated_md, generated_at=utcnow(), - llm_model=llm_result.model, - llm_cost_usd=llm_result.cost_usd, + model=llm_result.model, + prompt_tokens=llm_result.prompt_tokens, + completion_tokens=llm_result.completion_tokens, + cost_usd=llm_result.cost_usd, )) await session.commit() diff --git a/app/models.py b/app/models.py index 4416501..57c9f19 100644 --- a/app/models.py +++ b/app/models.py @@ -141,12 +141,14 @@ class StrategicLogTranslation(Base): nullable=False, ) lang: Mapped[str] = mapped_column(String(8), nullable=False) - content_md: Mapped[str] = mapped_column(Text, nullable=False) + content: Mapped[str] = mapped_column(Text, nullable=False) generated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), nullable=False, default=utcnow, ) - llm_model: Mapped[str | None] = mapped_column(String(64)) - llm_cost_usd: Mapped[float | None] = mapped_column(Float) + model: Mapped[str | None] = mapped_column(String(64)) + prompt_tokens: Mapped[int | None] = mapped_column(Integer) + completion_tokens: Mapped[int | None] = mapped_column(Integer) + cost_usd: Mapped[float | None] = mapped_column(Float) __table_args__ = ( UniqueConstraint("log_id", "lang", name="uq_slt_log_lang"), @@ -191,12 +193,14 @@ class IndicatorSummaryTranslation(Base): nullable=False, ) lang: Mapped[str] = mapped_column(String(8), nullable=False) - content_md: Mapped[str] = mapped_column(Text, nullable=False) + content: Mapped[str] = mapped_column(Text, nullable=False) generated_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), nullable=False, default=utcnow, ) - llm_model: Mapped[str | None] = mapped_column(String(64)) - llm_cost_usd: Mapped[float | None] = mapped_column(Float) + model: Mapped[str | None] = mapped_column(String(64)) + prompt_tokens: Mapped[int | None] = mapped_column(Integer) + completion_tokens: Mapped[int | None] = mapped_column(Integer) + cost_usd: Mapped[float | None] = mapped_column(Float) __table_args__ = ( UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"), @@ -535,5 +539,7 @@ class CsvFormatTemplate(Base): last_used_at: Mapped[datetime] = mapped_column( DateTime(timezone=True), nullable=False, default=utcnow, ) - llm_model: Mapped[str | None] = mapped_column(String(64)) - llm_cost_usd: Mapped[float | None] = mapped_column(Float) + model: Mapped[str | None] = mapped_column(String(64)) + prompt_tokens: Mapped[int | None] = mapped_column(Integer) + completion_tokens: Mapped[int | None] = mapped_column(Integer) + cost_usd: Mapped[float | None] = mapped_column(Float) diff --git a/app/routers/api.py b/app/routers/api.py index 30c1c62..10a9f5a 100644 --- a/app/routers/api.py +++ b/app/routers/api.py @@ -326,7 +326,7 @@ async def _localized_content( row: StrategicLog | None, principal: CurrentUser | None, ) -> str | None: - """Return the translated content_md for ``row`` when the principal has + """Return the translated content for ``row`` when the principal has a non-English lang preference and a matching translation row exists. Returns None to signal 'use row.content as-is' (the default English path).""" @@ -340,7 +340,7 @@ async def _localized_content( .where(StrategicLogTranslation.log_id == row.id) .where(StrategicLogTranslation.lang == lang) )).scalar_one_or_none() - return t.content_md if t is not None else None + return t.content if t is not None else None async def _apply_localized_summary( @@ -364,7 +364,7 @@ async def _apply_localized_summary( .where(IndicatorSummaryTranslation.lang == lang) )).scalar_one_or_none() if t is not None: - row.content = t.content_md + row.content = t.content def _resolve_tone_param(tone: str | None) -> str: diff --git a/app/services/llm_csv_parser.py b/app/services/llm_csv_parser.py index 7bb84af..7c7c7a5 100644 --- a/app/services/llm_csv_parser.py +++ b/app/services/llm_csv_parser.py @@ -424,8 +424,10 @@ async def parse_with_llm(raw: bytes, session: AsyncSession) -> ParsedPie: first_seen_at=now, last_used_at=now, use_count=1, - llm_model=llm_log.model, - llm_cost_usd=llm_log.cost_usd, + model=llm_log.model, + prompt_tokens=llm_log.prompt_tokens, + completion_tokens=llm_log.completion_tokens, + cost_usd=llm_log.cost_usd, )) await session.commit() return pie diff --git a/tests/test_llm_csv_parser.py b/tests/test_llm_csv_parser.py index 15765b3..8d5d42f 100644 --- a/tests/test_llm_csv_parser.py +++ b/tests/test_llm_csv_parser.py @@ -22,8 +22,10 @@ def test_csv_format_template_model_columns(): assert "first_seen_at" in cols assert "use_count" in cols assert "last_used_at" in cols - assert "llm_model" in cols - assert "llm_cost_usd" in cols + assert "model" in cols + assert "cost_usd" in cols + assert "prompt_tokens" in cols + assert "completion_tokens" in cols # Crucially, no user attribution. assert "user_id" not in cols assert "first_seen_user_id" not in cols @@ -330,7 +332,7 @@ async def test_parse_with_llm_cache_miss_inserts_template(db_factory): assert tmpl.mapping["ticker_col"] == "Symbol" assert tmpl.broker_label == "Generic broker" assert tmpl.use_count == 1 - assert tmpl.llm_cost_usd == pytest.approx(0.0002) + assert tmpl.cost_usd == pytest.approx(0.0002) # The crucial PII guarantee: assert not hasattr(tmpl, "user_id"), "sample row must not be linked to a user" @@ -365,8 +367,8 @@ async def test_parse_with_llm_cache_hit_skips_llm(db_factory): first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1, - llm_model="seed", - llm_cost_usd=0.0, + model="seed", + cost_usd=0.0, )) await session.commit() @@ -410,7 +412,7 @@ async def test_parse_with_llm_stale_mapping_raises_but_does_not_evict(db_factory mapping={"ticker_col": "Symbol", "qty_col": "Symbol"}, preamble_rows=0, delimiter=",", broker_label=None, first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1, - llm_model="seed", llm_cost_usd=0.0, + model="seed", cost_usd=0.0, )) await session.commit() diff --git a/tests/test_localization_integration.py b/tests/test_localization_integration.py index 6a1ea08..f527d5b 100644 --- a/tests/test_localization_integration.py +++ b/tests/test_localization_integration.py @@ -27,13 +27,13 @@ def test_strategic_log_translation_model_columns(): cols = {c.name: c for c in inspect(StrategicLogTranslation).columns} assert "log_id" in cols assert "lang" in cols - assert "content_md" in cols + assert "content" in cols assert "generated_at" in cols - assert "llm_model" in cols - assert "llm_cost_usd" in cols + assert "model" in cols + assert "cost_usd" in cols assert cols["log_id"].nullable is False assert cols["lang"].nullable is False - assert cols["content_md"].nullable is False + assert cols["content"].nullable is False async def test_log_translation_fanout_no_active_non_en_users(db_factory, monkeypatch): @@ -113,9 +113,9 @@ async def test_log_translation_fanout_italian_user(db_factory, monkeypatch): row = rows[0] assert row.log_id == log_id assert row.lang == "it" - assert row.content_md.startswith("# Apertura") - assert row.llm_model == "deepseek/deepseek-v4-flash" - assert row.llm_cost_usd == pytest.approx(0.00002) + assert row.content.startswith("# Apertura") + assert row.model == "deepseek/deepseek-v4-flash" + assert row.cost_usd == pytest.approx(0.00002) async def test_log_translation_fanout_per_language_failure_isolated(db_factory, monkeypatch):