i18n: localize indicator summaries (per-group + aggregate read)

This commit is contained in:
Giorgio Gilestro 2026-05-27 20:19:47 +02:00
parent 7acd191051
commit 664757ea8a
4 changed files with 173 additions and 12 deletions

View file

@ -13,8 +13,15 @@ from app.config import get_settings, load_groups
from app.db import utcnow
from app.jobs._helpers import job_lifecycle, log
from app.jobs._market_context import latest_quotes_by_group, month_spend
from app.models import AICall, IndicatorSummary, JobRun
from app.models import (
AICall,
IndicatorSummary,
IndicatorSummaryTranslation,
JobRun,
User,
)
from app.services.cadence import DEFAULT_POLICY
from app.services.i18n import ACTIVE_LANGUAGES
from app.services.openrouter import (
PROMPT_VERSION,
active_model,
@ -26,11 +33,58 @@ from app.services.openrouter import (
llm_configured,
month_start,
)
from app.services.translation import translate
AGGREGATE_GROUP_NAME = "__all__"
async def translate_summary_for_active_languages(session, summary_id: int) -> None:
"""Fan out per-language translations for one IndicatorSummary row.
Mirrors ``ai_log_job.translate_log_for_active_languages``: reads the
distinct non-en ``users.lang`` set, translates the English content
once per active language in parallel via ``asyncio.gather``, and
persists each result as an ``IndicatorSummaryTranslation`` row.
Per-language failures are logged but never raise.
"""
target_langs = sorted({l for l in ACTIVE_LANGUAGES if l != "en"})
if not target_langs:
return
active_langs = (await session.execute(
select(User.lang).distinct().where(User.lang.in_(target_langs))
)).scalars().all()
if not active_langs:
return
summary_row = await session.get(IndicatorSummary, summary_id)
if summary_row is None:
log.warning("ind_summary.translate.missing_summary", summary_id=summary_id)
return
async with httpx.AsyncClient(follow_redirects=True, timeout=60) as client:
results = await asyncio.gather(*[
translate(client, summary_row.content, lang)
for lang in active_langs
], return_exceptions=True)
for lang, result in zip(active_langs, results):
if isinstance(result, Exception):
log.warning("ind_summary.translate.failed",
lang=lang, summary_id=summary_id,
error=str(result)[:200])
continue
translated_md, llm_result = result
session.add(IndicatorSummaryTranslation(
summary_id=summary_id, lang=lang,
content_md=translated_md,
generated_at=utcnow(),
llm_model=llm_result.model,
llm_cost_usd=llm_result.cost_usd,
))
await session.commit()
# Strip known meta-commentary openers the model sometimes leaks despite the
# prompt's hard constraints. Each pattern matches one leading sentence.
_LEAK_PATTERNS = [
@ -140,8 +194,10 @@ def clean_summary(text: str) -> str:
async def _generate_one(
session, client: httpx.AsyncClient, group: str, quotes: list[dict],
system_prompt: str, model: str, tone: str, analysis: str,
) -> bool:
"""Generate + persist one group's summary. Returns True on success.
) -> IndicatorSummary | None:
"""Generate + persist one group's summary. Returns the new row on
success (so the caller can fan out localized translations after
the commit picks up its id) or None on failure.
`model` is retained for ledger labelling but call_llm now picks the
active-provider model itself."""
user_prompt = build_summary_user_prompt(group, quotes)
@ -155,7 +211,7 @@ async def _generate_one(
except Exception as e:
session.add(AICall(model=active_model(), status="error", error=str(e)[:500]))
log.warning("ind_summary.failed", group=group, error=str(e)[:120])
return False
return None
cleaned = clean_summary(result.content)
if looks_like_leakage(cleaned) or len(cleaned) < 40:
@ -171,9 +227,9 @@ async def _generate_one(
cost_usd=result.cost_usd,
status="leaked",
))
return False
return None
session.add(IndicatorSummary(
summary = IndicatorSummary(
group_name=group,
generated_at=utcnow(),
model=result.model,
@ -184,7 +240,8 @@ async def _generate_one(
prompt_tokens=result.prompt_tokens,
completion_tokens=result.completion_tokens,
cost_usd=result.cost_usd,
))
)
session.add(summary)
session.add(AICall(
model=result.model,
prompt_tokens=result.prompt_tokens,
@ -192,7 +249,7 @@ async def _generate_one(
cost_usd=result.cost_usd,
status="ok",
))
return True
return summary
async def run() -> None:
@ -249,17 +306,20 @@ async def run() -> None:
for tone in tones:
system_prompt = build_summary_system_prompt(tone, analysis)
for group, quotes in groups.items():
ok = await _generate_one(
summary = await _generate_one(
session, client, group, quotes,
system_prompt, active_model(), tone, analysis,
)
if ok:
if summary is not None:
written += 1
await session.commit() # partial progress survives mid-job error
if summary is not None:
await translate_summary_for_active_languages(session, summary.id)
# One aggregate read across all groups, stored under __all__.
agg_system = build_aggregate_summary_system_prompt(tone, analysis)
agg_user = build_aggregate_summary_user_prompt(groups)
agg_summary: IndicatorSummary | None = None
try:
result = await call_llm(
client,
@ -267,7 +327,7 @@ async def run() -> None:
{"role": "user", "content": agg_user}],
max_tokens=1500, # room for reasoning + 80-word output
)
session.add(IndicatorSummary(
agg_summary = IndicatorSummary(
group_name=AGGREGATE_GROUP_NAME,
generated_at=utcnow(),
model=result.model,
@ -278,7 +338,8 @@ async def run() -> None:
prompt_tokens=result.prompt_tokens,
completion_tokens=result.completion_tokens,
cost_usd=result.cost_usd,
))
)
session.add(agg_summary)
session.add(AICall(
model=result.model,
prompt_tokens=result.prompt_tokens,
@ -294,6 +355,8 @@ async def run() -> None:
log.warning("ind_summary.agg_failed",
tone=tone, error=str(e)[:120])
await session.commit()
if agg_summary is not None:
await translate_summary_for_active_languages(session, agg_summary.id)
jr.items_written = written
log.info("ind_summary.done",