models: align translation column naming + add token counts
Three recently-added tables (strategic_log_translations, indicator_summary_translations, csv_format_templates) drifted from the codebase's existing naming convention: - llm_model -> model - llm_cost_usd -> cost_usd - content_md -> content (on the two translation tables; csv_format doesn't have a content field) Also added prompt_tokens and completion_tokens to the three tables; they were silently dropped at write time despite LogResult exposing them. All writer call sites (ai_log_job, indicator_summary_job, llm_csv_parser) and reader call sites (api.py localized helpers) updated to match. Tests realigned. Migration 0025 uses batch_alter_table for SQLite compatibility. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
e4dc6d0071
commit
a6d686324c
8 changed files with 125 additions and 32 deletions
79
alembic/versions/0025_align_translation_columns.py
Normal file
79
alembic/versions/0025_align_translation_columns.py
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
"""align translation column naming + add token counts.
|
||||
|
||||
Revision ID: 0025
|
||||
Revises: 0024
|
||||
Create Date: 2026-05-27
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
import sqlalchemy as sa
|
||||
from alembic import op
|
||||
|
||||
|
||||
revision: str = "0025"
|
||||
down_revision: Union[str, None] = "0024"
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
# strategic_log_translations
|
||||
with op.batch_alter_table("strategic_log_translations") as bop:
|
||||
bop.alter_column("llm_model", new_column_name="model",
|
||||
existing_type=sa.String(length=64), existing_nullable=True)
|
||||
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
|
||||
existing_type=sa.Float(), existing_nullable=True)
|
||||
bop.alter_column("content_md", new_column_name="content",
|
||||
existing_type=sa.Text(), existing_nullable=False)
|
||||
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
|
||||
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
|
||||
|
||||
# indicator_summary_translations
|
||||
with op.batch_alter_table("indicator_summary_translations") as bop:
|
||||
bop.alter_column("llm_model", new_column_name="model",
|
||||
existing_type=sa.String(length=64), existing_nullable=True)
|
||||
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
|
||||
existing_type=sa.Float(), existing_nullable=True)
|
||||
bop.alter_column("content_md", new_column_name="content",
|
||||
existing_type=sa.Text(), existing_nullable=False)
|
||||
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
|
||||
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
|
||||
|
||||
# csv_format_templates
|
||||
with op.batch_alter_table("csv_format_templates") as bop:
|
||||
bop.alter_column("llm_model", new_column_name="model",
|
||||
existing_type=sa.String(length=64), existing_nullable=True)
|
||||
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
|
||||
existing_type=sa.Float(), existing_nullable=True)
|
||||
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
|
||||
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
with op.batch_alter_table("csv_format_templates") as bop:
|
||||
bop.drop_column("completion_tokens")
|
||||
bop.drop_column("prompt_tokens")
|
||||
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
|
||||
existing_type=sa.Float(), existing_nullable=True)
|
||||
bop.alter_column("model", new_column_name="llm_model",
|
||||
existing_type=sa.String(length=64), existing_nullable=True)
|
||||
|
||||
with op.batch_alter_table("indicator_summary_translations") as bop:
|
||||
bop.drop_column("completion_tokens")
|
||||
bop.drop_column("prompt_tokens")
|
||||
bop.alter_column("content", new_column_name="content_md",
|
||||
existing_type=sa.Text(), existing_nullable=False)
|
||||
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
|
||||
existing_type=sa.Float(), existing_nullable=True)
|
||||
bop.alter_column("model", new_column_name="llm_model",
|
||||
existing_type=sa.String(length=64), existing_nullable=True)
|
||||
|
||||
with op.batch_alter_table("strategic_log_translations") as bop:
|
||||
bop.drop_column("completion_tokens")
|
||||
bop.drop_column("prompt_tokens")
|
||||
bop.alter_column("content", new_column_name="content_md",
|
||||
existing_type=sa.Text(), existing_nullable=False)
|
||||
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
|
||||
existing_type=sa.Float(), existing_nullable=True)
|
||||
bop.alter_column("model", new_column_name="llm_model",
|
||||
existing_type=sa.String(length=64), existing_nullable=True)
|
||||
|
|
@ -74,10 +74,12 @@ async def translate_log_for_active_languages(session, log_id: int) -> None:
|
|||
translated_md, llm_result = result
|
||||
session.add(StrategicLogTranslation(
|
||||
log_id=log_id, lang=lang,
|
||||
content_md=translated_md,
|
||||
content=translated_md,
|
||||
generated_at=utcnow(),
|
||||
llm_model=llm_result.model,
|
||||
llm_cost_usd=llm_result.cost_usd,
|
||||
model=llm_result.model,
|
||||
prompt_tokens=llm_result.prompt_tokens,
|
||||
completion_tokens=llm_result.completion_tokens,
|
||||
cost_usd=llm_result.cost_usd,
|
||||
))
|
||||
await session.commit()
|
||||
|
||||
|
|
|
|||
|
|
@ -77,10 +77,12 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
|
|||
translated_md, llm_result = result
|
||||
session.add(IndicatorSummaryTranslation(
|
||||
summary_id=summary_id, lang=lang,
|
||||
content_md=translated_md,
|
||||
content=translated_md,
|
||||
generated_at=utcnow(),
|
||||
llm_model=llm_result.model,
|
||||
llm_cost_usd=llm_result.cost_usd,
|
||||
model=llm_result.model,
|
||||
prompt_tokens=llm_result.prompt_tokens,
|
||||
completion_tokens=llm_result.completion_tokens,
|
||||
cost_usd=llm_result.cost_usd,
|
||||
))
|
||||
await session.commit()
|
||||
|
||||
|
|
|
|||
|
|
@ -141,12 +141,14 @@ class StrategicLogTranslation(Base):
|
|||
nullable=False,
|
||||
)
|
||||
lang: Mapped[str] = mapped_column(String(8), nullable=False)
|
||||
content_md: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
content: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
generated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), nullable=False, default=utcnow,
|
||||
)
|
||||
llm_model: Mapped[str | None] = mapped_column(String(64))
|
||||
llm_cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||
model: Mapped[str | None] = mapped_column(String(64))
|
||||
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||
cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("log_id", "lang", name="uq_slt_log_lang"),
|
||||
|
|
@ -191,12 +193,14 @@ class IndicatorSummaryTranslation(Base):
|
|||
nullable=False,
|
||||
)
|
||||
lang: Mapped[str] = mapped_column(String(8), nullable=False)
|
||||
content_md: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
content: Mapped[str] = mapped_column(Text, nullable=False)
|
||||
generated_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), nullable=False, default=utcnow,
|
||||
)
|
||||
llm_model: Mapped[str | None] = mapped_column(String(64))
|
||||
llm_cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||
model: Mapped[str | None] = mapped_column(String(64))
|
||||
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||
cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||
|
||||
__table_args__ = (
|
||||
UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"),
|
||||
|
|
@ -535,5 +539,7 @@ class CsvFormatTemplate(Base):
|
|||
last_used_at: Mapped[datetime] = mapped_column(
|
||||
DateTime(timezone=True), nullable=False, default=utcnow,
|
||||
)
|
||||
llm_model: Mapped[str | None] = mapped_column(String(64))
|
||||
llm_cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||
model: Mapped[str | None] = mapped_column(String(64))
|
||||
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||
cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||
|
|
|
|||
|
|
@ -326,7 +326,7 @@ async def _localized_content(
|
|||
row: StrategicLog | None,
|
||||
principal: CurrentUser | None,
|
||||
) -> str | None:
|
||||
"""Return the translated content_md for ``row`` when the principal has
|
||||
"""Return the translated content for ``row`` when the principal has
|
||||
a non-English lang preference and a matching translation row exists.
|
||||
Returns None to signal 'use row.content as-is' (the default English
|
||||
path)."""
|
||||
|
|
@ -340,7 +340,7 @@ async def _localized_content(
|
|||
.where(StrategicLogTranslation.log_id == row.id)
|
||||
.where(StrategicLogTranslation.lang == lang)
|
||||
)).scalar_one_or_none()
|
||||
return t.content_md if t is not None else None
|
||||
return t.content if t is not None else None
|
||||
|
||||
|
||||
async def _apply_localized_summary(
|
||||
|
|
@ -364,7 +364,7 @@ async def _apply_localized_summary(
|
|||
.where(IndicatorSummaryTranslation.lang == lang)
|
||||
)).scalar_one_or_none()
|
||||
if t is not None:
|
||||
row.content = t.content_md
|
||||
row.content = t.content
|
||||
|
||||
|
||||
def _resolve_tone_param(tone: str | None) -> str:
|
||||
|
|
|
|||
|
|
@ -424,8 +424,10 @@ async def parse_with_llm(raw: bytes, session: AsyncSession) -> ParsedPie:
|
|||
first_seen_at=now,
|
||||
last_used_at=now,
|
||||
use_count=1,
|
||||
llm_model=llm_log.model,
|
||||
llm_cost_usd=llm_log.cost_usd,
|
||||
model=llm_log.model,
|
||||
prompt_tokens=llm_log.prompt_tokens,
|
||||
completion_tokens=llm_log.completion_tokens,
|
||||
cost_usd=llm_log.cost_usd,
|
||||
))
|
||||
await session.commit()
|
||||
return pie
|
||||
|
|
|
|||
|
|
@ -22,8 +22,10 @@ def test_csv_format_template_model_columns():
|
|||
assert "first_seen_at" in cols
|
||||
assert "use_count" in cols
|
||||
assert "last_used_at" in cols
|
||||
assert "llm_model" in cols
|
||||
assert "llm_cost_usd" in cols
|
||||
assert "model" in cols
|
||||
assert "cost_usd" in cols
|
||||
assert "prompt_tokens" in cols
|
||||
assert "completion_tokens" in cols
|
||||
# Crucially, no user attribution.
|
||||
assert "user_id" not in cols
|
||||
assert "first_seen_user_id" not in cols
|
||||
|
|
@ -330,7 +332,7 @@ async def test_parse_with_llm_cache_miss_inserts_template(db_factory):
|
|||
assert tmpl.mapping["ticker_col"] == "Symbol"
|
||||
assert tmpl.broker_label == "Generic broker"
|
||||
assert tmpl.use_count == 1
|
||||
assert tmpl.llm_cost_usd == pytest.approx(0.0002)
|
||||
assert tmpl.cost_usd == pytest.approx(0.0002)
|
||||
# The crucial PII guarantee:
|
||||
assert not hasattr(tmpl, "user_id"), "sample row must not be linked to a user"
|
||||
|
||||
|
|
@ -365,8 +367,8 @@ async def test_parse_with_llm_cache_hit_skips_llm(db_factory):
|
|||
first_seen_at=utcnow(),
|
||||
last_used_at=utcnow(),
|
||||
use_count=1,
|
||||
llm_model="seed",
|
||||
llm_cost_usd=0.0,
|
||||
model="seed",
|
||||
cost_usd=0.0,
|
||||
))
|
||||
await session.commit()
|
||||
|
||||
|
|
@ -410,7 +412,7 @@ async def test_parse_with_llm_stale_mapping_raises_but_does_not_evict(db_factory
|
|||
mapping={"ticker_col": "Symbol", "qty_col": "Symbol"},
|
||||
preamble_rows=0, delimiter=",", broker_label=None,
|
||||
first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1,
|
||||
llm_model="seed", llm_cost_usd=0.0,
|
||||
model="seed", cost_usd=0.0,
|
||||
))
|
||||
await session.commit()
|
||||
|
||||
|
|
|
|||
|
|
@ -27,13 +27,13 @@ def test_strategic_log_translation_model_columns():
|
|||
cols = {c.name: c for c in inspect(StrategicLogTranslation).columns}
|
||||
assert "log_id" in cols
|
||||
assert "lang" in cols
|
||||
assert "content_md" in cols
|
||||
assert "content" in cols
|
||||
assert "generated_at" in cols
|
||||
assert "llm_model" in cols
|
||||
assert "llm_cost_usd" in cols
|
||||
assert "model" in cols
|
||||
assert "cost_usd" in cols
|
||||
assert cols["log_id"].nullable is False
|
||||
assert cols["lang"].nullable is False
|
||||
assert cols["content_md"].nullable is False
|
||||
assert cols["content"].nullable is False
|
||||
|
||||
|
||||
async def test_log_translation_fanout_no_active_non_en_users(db_factory, monkeypatch):
|
||||
|
|
@ -113,9 +113,9 @@ async def test_log_translation_fanout_italian_user(db_factory, monkeypatch):
|
|||
row = rows[0]
|
||||
assert row.log_id == log_id
|
||||
assert row.lang == "it"
|
||||
assert row.content_md.startswith("# Apertura")
|
||||
assert row.llm_model == "deepseek/deepseek-v4-flash"
|
||||
assert row.llm_cost_usd == pytest.approx(0.00002)
|
||||
assert row.content.startswith("# Apertura")
|
||||
assert row.model == "deepseek/deepseek-v4-flash"
|
||||
assert row.cost_usd == pytest.approx(0.00002)
|
||||
|
||||
|
||||
async def test_log_translation_fanout_per_language_failure_isolated(db_factory, monkeypatch):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue