models: align translation column naming + add token counts
Three recently-added tables (strategic_log_translations, indicator_summary_translations, csv_format_templates) drifted from the codebase's existing naming convention: - llm_model -> model - llm_cost_usd -> cost_usd - content_md -> content (on the two translation tables; csv_format doesn't have a content field) Also added prompt_tokens and completion_tokens to the three tables; they were silently dropped at write time despite LogResult exposing them. All writer call sites (ai_log_job, indicator_summary_job, llm_csv_parser) and reader call sites (api.py localized helpers) updated to match. Tests realigned. Migration 0025 uses batch_alter_table for SQLite compatibility. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
e4dc6d0071
commit
a6d686324c
8 changed files with 125 additions and 32 deletions
79
alembic/versions/0025_align_translation_columns.py
Normal file
79
alembic/versions/0025_align_translation_columns.py
Normal file
|
|
@ -0,0 +1,79 @@
|
||||||
|
"""align translation column naming + add token counts.
|
||||||
|
|
||||||
|
Revision ID: 0025
|
||||||
|
Revises: 0024
|
||||||
|
Create Date: 2026-05-27
|
||||||
|
"""
|
||||||
|
from typing import Sequence, Union
|
||||||
|
|
||||||
|
import sqlalchemy as sa
|
||||||
|
from alembic import op
|
||||||
|
|
||||||
|
|
||||||
|
revision: str = "0025"
|
||||||
|
down_revision: Union[str, None] = "0024"
|
||||||
|
branch_labels: Union[str, Sequence[str], None] = None
|
||||||
|
depends_on: Union[str, Sequence[str], None] = None
|
||||||
|
|
||||||
|
|
||||||
|
def upgrade() -> None:
|
||||||
|
# strategic_log_translations
|
||||||
|
with op.batch_alter_table("strategic_log_translations") as bop:
|
||||||
|
bop.alter_column("llm_model", new_column_name="model",
|
||||||
|
existing_type=sa.String(length=64), existing_nullable=True)
|
||||||
|
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
|
||||||
|
existing_type=sa.Float(), existing_nullable=True)
|
||||||
|
bop.alter_column("content_md", new_column_name="content",
|
||||||
|
existing_type=sa.Text(), existing_nullable=False)
|
||||||
|
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
|
||||||
|
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
|
||||||
|
|
||||||
|
# indicator_summary_translations
|
||||||
|
with op.batch_alter_table("indicator_summary_translations") as bop:
|
||||||
|
bop.alter_column("llm_model", new_column_name="model",
|
||||||
|
existing_type=sa.String(length=64), existing_nullable=True)
|
||||||
|
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
|
||||||
|
existing_type=sa.Float(), existing_nullable=True)
|
||||||
|
bop.alter_column("content_md", new_column_name="content",
|
||||||
|
existing_type=sa.Text(), existing_nullable=False)
|
||||||
|
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
|
||||||
|
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
|
||||||
|
|
||||||
|
# csv_format_templates
|
||||||
|
with op.batch_alter_table("csv_format_templates") as bop:
|
||||||
|
bop.alter_column("llm_model", new_column_name="model",
|
||||||
|
existing_type=sa.String(length=64), existing_nullable=True)
|
||||||
|
bop.alter_column("llm_cost_usd", new_column_name="cost_usd",
|
||||||
|
existing_type=sa.Float(), existing_nullable=True)
|
||||||
|
bop.add_column(sa.Column("prompt_tokens", sa.Integer(), nullable=True))
|
||||||
|
bop.add_column(sa.Column("completion_tokens", sa.Integer(), nullable=True))
|
||||||
|
|
||||||
|
|
||||||
|
def downgrade() -> None:
|
||||||
|
with op.batch_alter_table("csv_format_templates") as bop:
|
||||||
|
bop.drop_column("completion_tokens")
|
||||||
|
bop.drop_column("prompt_tokens")
|
||||||
|
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
|
||||||
|
existing_type=sa.Float(), existing_nullable=True)
|
||||||
|
bop.alter_column("model", new_column_name="llm_model",
|
||||||
|
existing_type=sa.String(length=64), existing_nullable=True)
|
||||||
|
|
||||||
|
with op.batch_alter_table("indicator_summary_translations") as bop:
|
||||||
|
bop.drop_column("completion_tokens")
|
||||||
|
bop.drop_column("prompt_tokens")
|
||||||
|
bop.alter_column("content", new_column_name="content_md",
|
||||||
|
existing_type=sa.Text(), existing_nullable=False)
|
||||||
|
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
|
||||||
|
existing_type=sa.Float(), existing_nullable=True)
|
||||||
|
bop.alter_column("model", new_column_name="llm_model",
|
||||||
|
existing_type=sa.String(length=64), existing_nullable=True)
|
||||||
|
|
||||||
|
with op.batch_alter_table("strategic_log_translations") as bop:
|
||||||
|
bop.drop_column("completion_tokens")
|
||||||
|
bop.drop_column("prompt_tokens")
|
||||||
|
bop.alter_column("content", new_column_name="content_md",
|
||||||
|
existing_type=sa.Text(), existing_nullable=False)
|
||||||
|
bop.alter_column("cost_usd", new_column_name="llm_cost_usd",
|
||||||
|
existing_type=sa.Float(), existing_nullable=True)
|
||||||
|
bop.alter_column("model", new_column_name="llm_model",
|
||||||
|
existing_type=sa.String(length=64), existing_nullable=True)
|
||||||
|
|
@ -74,10 +74,12 @@ async def translate_log_for_active_languages(session, log_id: int) -> None:
|
||||||
translated_md, llm_result = result
|
translated_md, llm_result = result
|
||||||
session.add(StrategicLogTranslation(
|
session.add(StrategicLogTranslation(
|
||||||
log_id=log_id, lang=lang,
|
log_id=log_id, lang=lang,
|
||||||
content_md=translated_md,
|
content=translated_md,
|
||||||
generated_at=utcnow(),
|
generated_at=utcnow(),
|
||||||
llm_model=llm_result.model,
|
model=llm_result.model,
|
||||||
llm_cost_usd=llm_result.cost_usd,
|
prompt_tokens=llm_result.prompt_tokens,
|
||||||
|
completion_tokens=llm_result.completion_tokens,
|
||||||
|
cost_usd=llm_result.cost_usd,
|
||||||
))
|
))
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -77,10 +77,12 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
|
||||||
translated_md, llm_result = result
|
translated_md, llm_result = result
|
||||||
session.add(IndicatorSummaryTranslation(
|
session.add(IndicatorSummaryTranslation(
|
||||||
summary_id=summary_id, lang=lang,
|
summary_id=summary_id, lang=lang,
|
||||||
content_md=translated_md,
|
content=translated_md,
|
||||||
generated_at=utcnow(),
|
generated_at=utcnow(),
|
||||||
llm_model=llm_result.model,
|
model=llm_result.model,
|
||||||
llm_cost_usd=llm_result.cost_usd,
|
prompt_tokens=llm_result.prompt_tokens,
|
||||||
|
completion_tokens=llm_result.completion_tokens,
|
||||||
|
cost_usd=llm_result.cost_usd,
|
||||||
))
|
))
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -141,12 +141,14 @@ class StrategicLogTranslation(Base):
|
||||||
nullable=False,
|
nullable=False,
|
||||||
)
|
)
|
||||||
lang: Mapped[str] = mapped_column(String(8), nullable=False)
|
lang: Mapped[str] = mapped_column(String(8), nullable=False)
|
||||||
content_md: Mapped[str] = mapped_column(Text, nullable=False)
|
content: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
generated_at: Mapped[datetime] = mapped_column(
|
generated_at: Mapped[datetime] = mapped_column(
|
||||||
DateTime(timezone=True), nullable=False, default=utcnow,
|
DateTime(timezone=True), nullable=False, default=utcnow,
|
||||||
)
|
)
|
||||||
llm_model: Mapped[str | None] = mapped_column(String(64))
|
model: Mapped[str | None] = mapped_column(String(64))
|
||||||
llm_cost_usd: Mapped[float | None] = mapped_column(Float)
|
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||||
|
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
UniqueConstraint("log_id", "lang", name="uq_slt_log_lang"),
|
UniqueConstraint("log_id", "lang", name="uq_slt_log_lang"),
|
||||||
|
|
@ -191,12 +193,14 @@ class IndicatorSummaryTranslation(Base):
|
||||||
nullable=False,
|
nullable=False,
|
||||||
)
|
)
|
||||||
lang: Mapped[str] = mapped_column(String(8), nullable=False)
|
lang: Mapped[str] = mapped_column(String(8), nullable=False)
|
||||||
content_md: Mapped[str] = mapped_column(Text, nullable=False)
|
content: Mapped[str] = mapped_column(Text, nullable=False)
|
||||||
generated_at: Mapped[datetime] = mapped_column(
|
generated_at: Mapped[datetime] = mapped_column(
|
||||||
DateTime(timezone=True), nullable=False, default=utcnow,
|
DateTime(timezone=True), nullable=False, default=utcnow,
|
||||||
)
|
)
|
||||||
llm_model: Mapped[str | None] = mapped_column(String(64))
|
model: Mapped[str | None] = mapped_column(String(64))
|
||||||
llm_cost_usd: Mapped[float | None] = mapped_column(Float)
|
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||||
|
|
||||||
__table_args__ = (
|
__table_args__ = (
|
||||||
UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"),
|
UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"),
|
||||||
|
|
@ -535,5 +539,7 @@ class CsvFormatTemplate(Base):
|
||||||
last_used_at: Mapped[datetime] = mapped_column(
|
last_used_at: Mapped[datetime] = mapped_column(
|
||||||
DateTime(timezone=True), nullable=False, default=utcnow,
|
DateTime(timezone=True), nullable=False, default=utcnow,
|
||||||
)
|
)
|
||||||
llm_model: Mapped[str | None] = mapped_column(String(64))
|
model: Mapped[str | None] = mapped_column(String(64))
|
||||||
llm_cost_usd: Mapped[float | None] = mapped_column(Float)
|
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
||||||
|
cost_usd: Mapped[float | None] = mapped_column(Float)
|
||||||
|
|
|
||||||
|
|
@ -326,7 +326,7 @@ async def _localized_content(
|
||||||
row: StrategicLog | None,
|
row: StrategicLog | None,
|
||||||
principal: CurrentUser | None,
|
principal: CurrentUser | None,
|
||||||
) -> str | None:
|
) -> str | None:
|
||||||
"""Return the translated content_md for ``row`` when the principal has
|
"""Return the translated content for ``row`` when the principal has
|
||||||
a non-English lang preference and a matching translation row exists.
|
a non-English lang preference and a matching translation row exists.
|
||||||
Returns None to signal 'use row.content as-is' (the default English
|
Returns None to signal 'use row.content as-is' (the default English
|
||||||
path)."""
|
path)."""
|
||||||
|
|
@ -340,7 +340,7 @@ async def _localized_content(
|
||||||
.where(StrategicLogTranslation.log_id == row.id)
|
.where(StrategicLogTranslation.log_id == row.id)
|
||||||
.where(StrategicLogTranslation.lang == lang)
|
.where(StrategicLogTranslation.lang == lang)
|
||||||
)).scalar_one_or_none()
|
)).scalar_one_or_none()
|
||||||
return t.content_md if t is not None else None
|
return t.content if t is not None else None
|
||||||
|
|
||||||
|
|
||||||
async def _apply_localized_summary(
|
async def _apply_localized_summary(
|
||||||
|
|
@ -364,7 +364,7 @@ async def _apply_localized_summary(
|
||||||
.where(IndicatorSummaryTranslation.lang == lang)
|
.where(IndicatorSummaryTranslation.lang == lang)
|
||||||
)).scalar_one_or_none()
|
)).scalar_one_or_none()
|
||||||
if t is not None:
|
if t is not None:
|
||||||
row.content = t.content_md
|
row.content = t.content
|
||||||
|
|
||||||
|
|
||||||
def _resolve_tone_param(tone: str | None) -> str:
|
def _resolve_tone_param(tone: str | None) -> str:
|
||||||
|
|
|
||||||
|
|
@ -424,8 +424,10 @@ async def parse_with_llm(raw: bytes, session: AsyncSession) -> ParsedPie:
|
||||||
first_seen_at=now,
|
first_seen_at=now,
|
||||||
last_used_at=now,
|
last_used_at=now,
|
||||||
use_count=1,
|
use_count=1,
|
||||||
llm_model=llm_log.model,
|
model=llm_log.model,
|
||||||
llm_cost_usd=llm_log.cost_usd,
|
prompt_tokens=llm_log.prompt_tokens,
|
||||||
|
completion_tokens=llm_log.completion_tokens,
|
||||||
|
cost_usd=llm_log.cost_usd,
|
||||||
))
|
))
|
||||||
await session.commit()
|
await session.commit()
|
||||||
return pie
|
return pie
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,10 @@ def test_csv_format_template_model_columns():
|
||||||
assert "first_seen_at" in cols
|
assert "first_seen_at" in cols
|
||||||
assert "use_count" in cols
|
assert "use_count" in cols
|
||||||
assert "last_used_at" in cols
|
assert "last_used_at" in cols
|
||||||
assert "llm_model" in cols
|
assert "model" in cols
|
||||||
assert "llm_cost_usd" in cols
|
assert "cost_usd" in cols
|
||||||
|
assert "prompt_tokens" in cols
|
||||||
|
assert "completion_tokens" in cols
|
||||||
# Crucially, no user attribution.
|
# Crucially, no user attribution.
|
||||||
assert "user_id" not in cols
|
assert "user_id" not in cols
|
||||||
assert "first_seen_user_id" not in cols
|
assert "first_seen_user_id" not in cols
|
||||||
|
|
@ -330,7 +332,7 @@ async def test_parse_with_llm_cache_miss_inserts_template(db_factory):
|
||||||
assert tmpl.mapping["ticker_col"] == "Symbol"
|
assert tmpl.mapping["ticker_col"] == "Symbol"
|
||||||
assert tmpl.broker_label == "Generic broker"
|
assert tmpl.broker_label == "Generic broker"
|
||||||
assert tmpl.use_count == 1
|
assert tmpl.use_count == 1
|
||||||
assert tmpl.llm_cost_usd == pytest.approx(0.0002)
|
assert tmpl.cost_usd == pytest.approx(0.0002)
|
||||||
# The crucial PII guarantee:
|
# The crucial PII guarantee:
|
||||||
assert not hasattr(tmpl, "user_id"), "sample row must not be linked to a user"
|
assert not hasattr(tmpl, "user_id"), "sample row must not be linked to a user"
|
||||||
|
|
||||||
|
|
@ -365,8 +367,8 @@ async def test_parse_with_llm_cache_hit_skips_llm(db_factory):
|
||||||
first_seen_at=utcnow(),
|
first_seen_at=utcnow(),
|
||||||
last_used_at=utcnow(),
|
last_used_at=utcnow(),
|
||||||
use_count=1,
|
use_count=1,
|
||||||
llm_model="seed",
|
model="seed",
|
||||||
llm_cost_usd=0.0,
|
cost_usd=0.0,
|
||||||
))
|
))
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
@ -410,7 +412,7 @@ async def test_parse_with_llm_stale_mapping_raises_but_does_not_evict(db_factory
|
||||||
mapping={"ticker_col": "Symbol", "qty_col": "Symbol"},
|
mapping={"ticker_col": "Symbol", "qty_col": "Symbol"},
|
||||||
preamble_rows=0, delimiter=",", broker_label=None,
|
preamble_rows=0, delimiter=",", broker_label=None,
|
||||||
first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1,
|
first_seen_at=utcnow(), last_used_at=utcnow(), use_count=1,
|
||||||
llm_model="seed", llm_cost_usd=0.0,
|
model="seed", cost_usd=0.0,
|
||||||
))
|
))
|
||||||
await session.commit()
|
await session.commit()
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,13 +27,13 @@ def test_strategic_log_translation_model_columns():
|
||||||
cols = {c.name: c for c in inspect(StrategicLogTranslation).columns}
|
cols = {c.name: c for c in inspect(StrategicLogTranslation).columns}
|
||||||
assert "log_id" in cols
|
assert "log_id" in cols
|
||||||
assert "lang" in cols
|
assert "lang" in cols
|
||||||
assert "content_md" in cols
|
assert "content" in cols
|
||||||
assert "generated_at" in cols
|
assert "generated_at" in cols
|
||||||
assert "llm_model" in cols
|
assert "model" in cols
|
||||||
assert "llm_cost_usd" in cols
|
assert "cost_usd" in cols
|
||||||
assert cols["log_id"].nullable is False
|
assert cols["log_id"].nullable is False
|
||||||
assert cols["lang"].nullable is False
|
assert cols["lang"].nullable is False
|
||||||
assert cols["content_md"].nullable is False
|
assert cols["content"].nullable is False
|
||||||
|
|
||||||
|
|
||||||
async def test_log_translation_fanout_no_active_non_en_users(db_factory, monkeypatch):
|
async def test_log_translation_fanout_no_active_non_en_users(db_factory, monkeypatch):
|
||||||
|
|
@ -113,9 +113,9 @@ async def test_log_translation_fanout_italian_user(db_factory, monkeypatch):
|
||||||
row = rows[0]
|
row = rows[0]
|
||||||
assert row.log_id == log_id
|
assert row.log_id == log_id
|
||||||
assert row.lang == "it"
|
assert row.lang == "it"
|
||||||
assert row.content_md.startswith("# Apertura")
|
assert row.content.startswith("# Apertura")
|
||||||
assert row.llm_model == "deepseek/deepseek-v4-flash"
|
assert row.model == "deepseek/deepseek-v4-flash"
|
||||||
assert row.llm_cost_usd == pytest.approx(0.00002)
|
assert row.cost_usd == pytest.approx(0.00002)
|
||||||
|
|
||||||
|
|
||||||
async def test_log_translation_fanout_per_language_failure_isolated(db_factory, monkeypatch):
|
async def test_log_translation_fanout_per_language_failure_isolated(db_factory, monkeypatch):
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue