i18n: localize indicator summaries (per-group + aggregate read)

2026-05-27 20:19:47 +02:00 · 2026-05-27 20:19:47 +02:00 · 664757ea8a
commit 664757ea8a
parent 7acd191051
4 changed files with 173 additions and 12 deletions
--- a/app/jobs/indicator_summary_job.py
+++ b/app/jobs/indicator_summary_job.py
@ -13,8 +13,15 @@ from app.config import get_settings, load_groups
 from app.db import utcnow
 from app.jobs._helpers import job_lifecycle, log
 from app.jobs._market_context import latest_quotes_by_group, month_spend
-from app.models import AICall, IndicatorSummary, JobRun
+from app.models import (
+    AICall,
+    IndicatorSummary,
+    IndicatorSummaryTranslation,
+    JobRun,
+    User,
+)
 from app.services.cadence import DEFAULT_POLICY
+from app.services.i18n import ACTIVE_LANGUAGES
 from app.services.openrouter import (
    PROMPT_VERSION,
    active_model,
@ -26,11 +33,58 @@ from app.services.openrouter import (
    llm_configured,
    month_start,
 )
+from app.services.translation import translate


 AGGREGATE_GROUP_NAME = "__all__"


+async def translate_summary_for_active_languages(session, summary_id: int) -> None:
+    """Fan out per-language translations for one IndicatorSummary row.
+
+    Mirrors ``ai_log_job.translate_log_for_active_languages``: reads the
+    distinct non-en ``users.lang`` set, translates the English content
+    once per active language in parallel via ``asyncio.gather``, and
+    persists each result as an ``IndicatorSummaryTranslation`` row.
+    Per-language failures are logged but never raise.
+    """
+    target_langs = sorted({l for l in ACTIVE_LANGUAGES if l != "en"})
+    if not target_langs:
+        return
+    active_langs = (await session.execute(
+        select(User.lang).distinct().where(User.lang.in_(target_langs))
+    )).scalars().all()
+    if not active_langs:
+        return
+
+    summary_row = await session.get(IndicatorSummary, summary_id)
+    if summary_row is None:
+        log.warning("ind_summary.translate.missing_summary", summary_id=summary_id)
+        return
+
+    async with httpx.AsyncClient(follow_redirects=True, timeout=60) as client:
+        results = await asyncio.gather(*[
+            translate(client, summary_row.content, lang)
+            for lang in active_langs
+        ], return_exceptions=True)
+
+    for lang, result in zip(active_langs, results):
+        if isinstance(result, Exception):
+            log.warning("ind_summary.translate.failed",
+                        lang=lang, summary_id=summary_id,
+                        error=str(result)[:200])
+            continue
+        translated_md, llm_result = result
+        session.add(IndicatorSummaryTranslation(
+            summary_id=summary_id, lang=lang,
+            content_md=translated_md,
+            generated_at=utcnow(),
+            llm_model=llm_result.model,
+            llm_cost_usd=llm_result.cost_usd,
+        ))
+    await session.commit()
+
+
 # Strip known meta-commentary openers the model sometimes leaks despite the
 # prompt's hard constraints. Each pattern matches one leading sentence.
 _LEAK_PATTERNS = [
@ -140,8 +194,10 @@ def clean_summary(text: str) -> str:
 async def _generate_one(
    session, client: httpx.AsyncClient, group: str, quotes: list[dict],
    system_prompt: str, model: str, tone: str, analysis: str,
-) -> bool:
-    """Generate + persist one group's summary. Returns True on success.
+) -> IndicatorSummary | None:
+    """Generate + persist one group's summary. Returns the new row on
+    success (so the caller can fan out localized translations after
+    the commit picks up its id) or None on failure.
    `model` is retained for ledger labelling but call_llm now picks the
    active-provider model itself."""
    user_prompt = build_summary_user_prompt(group, quotes)
@ -155,7 +211,7 @@ async def _generate_one(
    except Exception as e:
        session.add(AICall(model=active_model(), status="error", error=str(e)[:500]))
        log.warning("ind_summary.failed", group=group, error=str(e)[:120])
-        return False
+        return None

    cleaned = clean_summary(result.content)
    if looks_like_leakage(cleaned) or len(cleaned) < 40:
@ -171,9 +227,9 @@ async def _generate_one(
            cost_usd=result.cost_usd,
            status="leaked",
        ))
-        return False
+        return None

-    session.add(IndicatorSummary(
+    summary = IndicatorSummary(
        group_name=group,
        generated_at=utcnow(),
        model=result.model,
@ -184,7 +240,8 @@ async def _generate_one(
        prompt_tokens=result.prompt_tokens,
        completion_tokens=result.completion_tokens,
        cost_usd=result.cost_usd,
-    ))
+    )
+    session.add(summary)
    session.add(AICall(
        model=result.model,
        prompt_tokens=result.prompt_tokens,
@ -192,7 +249,7 @@ async def _generate_one(
        cost_usd=result.cost_usd,
        status="ok",
    ))
-    return True
+    return summary


 async def run() -> None:
@ -249,17 +306,20 @@ async def run() -> None:
            for tone in tones:
                system_prompt = build_summary_system_prompt(tone, analysis)
                for group, quotes in groups.items():
-                    ok = await _generate_one(
+                    summary = await _generate_one(
                        session, client, group, quotes,
                        system_prompt, active_model(), tone, analysis,
                    )
-                    if ok:
+                    if summary is not None:
                        written += 1
                    await session.commit()  # partial progress survives mid-job error
+                    if summary is not None:
+                        await translate_summary_for_active_languages(session, summary.id)

                # One aggregate read across all groups, stored under __all__.
                agg_system = build_aggregate_summary_system_prompt(tone, analysis)
                agg_user = build_aggregate_summary_user_prompt(groups)
+                agg_summary: IndicatorSummary | None = None
                try:
                    result = await call_llm(
                        client,
@ -267,7 +327,7 @@ async def run() -> None:
                         {"role": "user", "content": agg_user}],
                        max_tokens=1500,  # room for reasoning + 80-word output
                    )
-                    session.add(IndicatorSummary(
+                    agg_summary = IndicatorSummary(
                        group_name=AGGREGATE_GROUP_NAME,
                        generated_at=utcnow(),
                        model=result.model,
@ -278,7 +338,8 @@ async def run() -> None:
                        prompt_tokens=result.prompt_tokens,
                        completion_tokens=result.completion_tokens,
                        cost_usd=result.cost_usd,
-                    ))
+                    )
+                    session.add(agg_summary)
                    session.add(AICall(
                        model=result.model,
                        prompt_tokens=result.prompt_tokens,
@ -294,6 +355,8 @@ async def run() -> None:
                    log.warning("ind_summary.agg_failed",
                                tone=tone, error=str(e)[:120])
                await session.commit()
+                if agg_summary is not None:
+                    await translate_summary_for_active_languages(session, agg_summary.id)

        jr.items_written = written
        log.info("ind_summary.done",