i18n: localize indicator summaries (per-group + aggregate read)

This commit is contained in:
Giorgio Gilestro 2026-05-27 20:19:47 +02:00
parent 7acd191051
commit 664757ea8a
4 changed files with 173 additions and 12 deletions

View file

@ -0,0 +1,38 @@
"""indicator_summary_translations table.
Revision ID: 0024
Revises: 0023
Create Date: 2026-05-27
"""
from typing import Sequence, Union
import sqlalchemy as sa
from alembic import op
revision: str = "0024"
down_revision: Union[str, None] = "0023"
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None
def upgrade() -> None:
op.create_table(
"indicator_summary_translations",
sa.Column("id", sa.BigInteger(), primary_key=True, autoincrement=True),
sa.Column("summary_id", sa.BigInteger(), nullable=False),
sa.Column("lang", sa.String(length=8), nullable=False),
sa.Column("content_md", sa.Text(), nullable=False),
sa.Column("generated_at", sa.DateTime(timezone=True), nullable=False),
sa.Column("llm_model", sa.String(length=64), nullable=True),
sa.Column("llm_cost_usd", sa.Float(), nullable=True),
sa.ForeignKeyConstraint(
["summary_id"], ["indicator_summaries.id"],
ondelete="CASCADE", name="fk_ist_summary",
),
sa.UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"),
)
def downgrade() -> None:
op.drop_table("indicator_summary_translations")

View file

@ -13,8 +13,15 @@ from app.config import get_settings, load_groups
from app.db import utcnow
from app.jobs._helpers import job_lifecycle, log
from app.jobs._market_context import latest_quotes_by_group, month_spend
from app.models import AICall, IndicatorSummary, JobRun
from app.models import (
AICall,
IndicatorSummary,
IndicatorSummaryTranslation,
JobRun,
User,
)
from app.services.cadence import DEFAULT_POLICY
from app.services.i18n import ACTIVE_LANGUAGES
from app.services.openrouter import (
PROMPT_VERSION,
active_model,
@ -26,11 +33,58 @@ from app.services.openrouter import (
llm_configured,
month_start,
)
from app.services.translation import translate
AGGREGATE_GROUP_NAME = "__all__"
async def translate_summary_for_active_languages(session, summary_id: int) -> None:
"""Fan out per-language translations for one IndicatorSummary row.
Mirrors ``ai_log_job.translate_log_for_active_languages``: reads the
distinct non-en ``users.lang`` set, translates the English content
once per active language in parallel via ``asyncio.gather``, and
persists each result as an ``IndicatorSummaryTranslation`` row.
Per-language failures are logged but never raise.
"""
target_langs = sorted({l for l in ACTIVE_LANGUAGES if l != "en"})
if not target_langs:
return
active_langs = (await session.execute(
select(User.lang).distinct().where(User.lang.in_(target_langs))
)).scalars().all()
if not active_langs:
return
summary_row = await session.get(IndicatorSummary, summary_id)
if summary_row is None:
log.warning("ind_summary.translate.missing_summary", summary_id=summary_id)
return
async with httpx.AsyncClient(follow_redirects=True, timeout=60) as client:
results = await asyncio.gather(*[
translate(client, summary_row.content, lang)
for lang in active_langs
], return_exceptions=True)
for lang, result in zip(active_langs, results):
if isinstance(result, Exception):
log.warning("ind_summary.translate.failed",
lang=lang, summary_id=summary_id,
error=str(result)[:200])
continue
translated_md, llm_result = result
session.add(IndicatorSummaryTranslation(
summary_id=summary_id, lang=lang,
content_md=translated_md,
generated_at=utcnow(),
llm_model=llm_result.model,
llm_cost_usd=llm_result.cost_usd,
))
await session.commit()
# Strip known meta-commentary openers the model sometimes leaks despite the
# prompt's hard constraints. Each pattern matches one leading sentence.
_LEAK_PATTERNS = [
@ -140,8 +194,10 @@ def clean_summary(text: str) -> str:
async def _generate_one(
session, client: httpx.AsyncClient, group: str, quotes: list[dict],
system_prompt: str, model: str, tone: str, analysis: str,
) -> bool:
"""Generate + persist one group's summary. Returns True on success.
) -> IndicatorSummary | None:
"""Generate + persist one group's summary. Returns the new row on
success (so the caller can fan out localized translations after
the commit picks up its id) or None on failure.
`model` is retained for ledger labelling but call_llm now picks the
active-provider model itself."""
user_prompt = build_summary_user_prompt(group, quotes)
@ -155,7 +211,7 @@ async def _generate_one(
except Exception as e:
session.add(AICall(model=active_model(), status="error", error=str(e)[:500]))
log.warning("ind_summary.failed", group=group, error=str(e)[:120])
return False
return None
cleaned = clean_summary(result.content)
if looks_like_leakage(cleaned) or len(cleaned) < 40:
@ -171,9 +227,9 @@ async def _generate_one(
cost_usd=result.cost_usd,
status="leaked",
))
return False
return None
session.add(IndicatorSummary(
summary = IndicatorSummary(
group_name=group,
generated_at=utcnow(),
model=result.model,
@ -184,7 +240,8 @@ async def _generate_one(
prompt_tokens=result.prompt_tokens,
completion_tokens=result.completion_tokens,
cost_usd=result.cost_usd,
))
)
session.add(summary)
session.add(AICall(
model=result.model,
prompt_tokens=result.prompt_tokens,
@ -192,7 +249,7 @@ async def _generate_one(
cost_usd=result.cost_usd,
status="ok",
))
return True
return summary
async def run() -> None:
@ -249,17 +306,20 @@ async def run() -> None:
for tone in tones:
system_prompt = build_summary_system_prompt(tone, analysis)
for group, quotes in groups.items():
ok = await _generate_one(
summary = await _generate_one(
session, client, group, quotes,
system_prompt, active_model(), tone, analysis,
)
if ok:
if summary is not None:
written += 1
await session.commit() # partial progress survives mid-job error
if summary is not None:
await translate_summary_for_active_languages(session, summary.id)
# One aggregate read across all groups, stored under __all__.
agg_system = build_aggregate_summary_system_prompt(tone, analysis)
agg_user = build_aggregate_summary_user_prompt(groups)
agg_summary: IndicatorSummary | None = None
try:
result = await call_llm(
client,
@ -267,7 +327,7 @@ async def run() -> None:
{"role": "user", "content": agg_user}],
max_tokens=1500, # room for reasoning + 80-word output
)
session.add(IndicatorSummary(
agg_summary = IndicatorSummary(
group_name=AGGREGATE_GROUP_NAME,
generated_at=utcnow(),
model=result.model,
@ -278,7 +338,8 @@ async def run() -> None:
prompt_tokens=result.prompt_tokens,
completion_tokens=result.completion_tokens,
cost_usd=result.cost_usd,
))
)
session.add(agg_summary)
session.add(AICall(
model=result.model,
prompt_tokens=result.prompt_tokens,
@ -294,6 +355,8 @@ async def run() -> None:
log.warning("ind_summary.agg_failed",
tone=tone, error=str(e)[:120])
await session.commit()
if agg_summary is not None:
await translate_summary_for_active_languages(session, agg_summary.id)
jr.items_written = written
log.info("ind_summary.done",

View file

@ -172,6 +172,37 @@ class IndicatorSummary(Base):
__table_args__ = (Index("ix_indsumm_group_generated", "group_name", "generated_at"),)
class IndicatorSummaryTranslation(Base):
"""Cached translation of a single IndicatorSummary row.
Same pattern as StrategicLogTranslation: one row per
(summary_id, lang). Populated by indicator_summary_job after the
English row is committed. The dashboard / indicators endpoints
swap in the matching translation when a user with a non-en
lang preference loads them, falling back silently to the English
source when no row exists yet.
"""
__tablename__ = "indicator_summary_translations"
id: Mapped[int] = mapped_column(_PK, primary_key=True, autoincrement=True)
summary_id: Mapped[int] = mapped_column(
BigInteger().with_variant(Integer(), "sqlite"),
ForeignKey("indicator_summaries.id", ondelete="CASCADE"),
nullable=False,
)
lang: Mapped[str] = mapped_column(String(8), nullable=False)
content_md: Mapped[str] = mapped_column(Text, nullable=False)
generated_at: Mapped[datetime] = mapped_column(
DateTime(timezone=True), nullable=False, default=utcnow,
)
llm_model: Mapped[str | None] = mapped_column(String(64))
llm_cost_usd: Mapped[float | None] = mapped_column(Float)
__table_args__ = (
UniqueConstraint("summary_id", "lang", name="uq_ist_summary_lang"),
)
class AICall(Base):
"""Cost ledger for OpenRouter calls. Feeds the monthly cap check."""
__tablename__ = "ai_calls"

View file

@ -36,6 +36,7 @@ from app.models import (
AICall,
Headline,
IndicatorSummary,
IndicatorSummaryTranslation,
JobRun,
Quote,
StrategicLog,
@ -138,6 +139,7 @@ async def indicators(
as_: str | None = Query(default=None, alias="as"),
tone: str | None = Query(default=None),
session: AsyncSession = Depends(get_session),
principal: CurrentUser | None = Depends(maybe_current_user),
):
sub = (
select(Quote.symbol, func.max(Quote.fetched_at).label("mx"))
@ -205,6 +207,7 @@ async def indicators(
if as_of_d and (today - as_of_d).days > threshold:
stale_symbols.add(r.symbol)
await _apply_localized_summary(session, summary, principal)
return templates.TemplateResponse(
request, "partials/indicators.html",
{"quotes": rows, "has_anchor": has_anchor,
@ -340,6 +343,30 @@ async def _localized_content(
return t.content_md if t is not None else None
async def _apply_localized_summary(
session: AsyncSession,
row: IndicatorSummary | None,
principal: CurrentUser | None,
) -> None:
"""If ``row`` has a matching translation for ``principal.user.lang``,
overwrite the in-memory ``content`` attribute so the template renders
the localized version. No DB write happens the mutation lives only
for the lifetime of this GET request.
"""
if row is None or principal is None or principal.user is None:
return
lang = (principal.user.lang or "en")
if lang == "en":
return
t = (await session.execute(
select(IndicatorSummaryTranslation)
.where(IndicatorSummaryTranslation.summary_id == row.id)
.where(IndicatorSummaryTranslation.lang == lang)
)).scalar_one_or_none()
if t is not None:
row.content = t.content_md
def _resolve_tone_param(tone: str | None) -> str:
"""Normalise a query-param tone to one of the two valid values.
PRO is silently mapped to INTERMEDIATE (see openrouter.PROMPT_VERSION 6)."""
@ -552,6 +579,7 @@ async def aggregate_summary(
session: AsyncSession = Depends(get_session),
as_: str | None = Query(default=None, alias="as"),
tone: str | None = Query(default=None),
principal: CurrentUser | None = Depends(maybe_current_user),
):
wanted_tone = _resolve_tone_param(tone)
row = (await session.execute(
@ -573,6 +601,7 @@ async def aggregate_summary(
statuses = all_statuses()
if as_ == "html":
await _apply_localized_summary(session, row, principal)
return templates.TemplateResponse(
request, "partials/dashboard_header.html",
{"summary": row, "markets": statuses, "tone": wanted_tone},