From 924f37548b7c676b6c169029ebf81cfaa53b7d27 Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Wed, 27 May 2026 17:07:18 +0200 Subject: [PATCH] digest: translate variants once per active non-en language Co-Authored-By: Claude Opus 4.7 --- app/jobs/email_digest_job.py | 78 +++++++++++++++++++++--- tests/test_localization_integration.py | 82 ++++++++++++++++++++++++++ 2 files changed, 152 insertions(+), 8 deletions(-) diff --git a/app/jobs/email_digest_job.py b/app/jobs/email_digest_job.py index 1f38777..5ff25c6 100644 --- a/app/jobs/email_digest_job.py +++ b/app/jobs/email_digest_job.py @@ -30,6 +30,7 @@ from app.models import EmailSend, User from app.routers.email import sign_unsubscribe_token from app.services.access import paid_status from app.services.email_service import render_digest_email, send_email +from app.services.i18n import ACTIVE_LANGUAGES from app.services.openrouter import ( PROMPT_VERSION, build_daily_digest_prompt, @@ -37,6 +38,7 @@ from app.services.openrouter import ( call_llm, llm_configured, ) +from app.services.translation import translate def _now() -> datetime: @@ -116,6 +118,62 @@ def _kind_for_today(today: datetime) -> str: return "weekly" if today.weekday() == 6 else "daily" +async def _translate_variants_for_active_langs( + client, + english_variants: dict[str, str], + target_langs: list[str], +) -> dict[tuple[str, str], str]: + """Build a {(tone, lang): content_md} table. + + Starts with the English variants as the canonical cells. For each + (tone, target_lang) pair where target_lang != 'en', calls translate() + in parallel; on failure the cell falls back to the English variant + of the same tone so the digest still goes out, just untranslated. + """ + table: dict[tuple[str, str], str] = { + (tone, "en"): content for tone, content in english_variants.items() + } + pairs = [ + (tone, lang) + for tone in english_variants + for lang in target_langs + if lang != "en" + ] + if not pairs: + return table + + results = await asyncio.gather(*[ + translate(client, english_variants[tone], lang) for tone, lang in pairs + ], return_exceptions=True) + for (tone, lang), result in zip(pairs, results): + if isinstance(result, Exception): + log.warning("digest.translate.failed", + tone=tone, lang=lang, error=str(result)[:200]) + table[(tone, lang)] = english_variants[tone] + continue + translated_md, _llm_log = result + table[(tone, lang)] = translated_md + return table + + +def _pick_variant( + table: dict[tuple[str, str], str], tone: str, lang: str, +) -> str: + """Return the digest content for a recipient. + + Lookup order: exact (tone, lang) → (tone, 'en') → ('INTERMEDIATE', + 'en') → first table value. The last falls are defensive; the table + always contains at least one English entry when the job is sending. + """ + if (tone, lang) in table: + return table[(tone, lang)] + if (tone, "en") in table: + return table[(tone, "en")] + if ("INTERMEDIATE", "en") in table: + return table[("INTERMEDIATE", "en")] + return next(iter(table.values())) + + async def _send_one(user: User, kind: str, content_html: str, date_str: str, session) -> None: settings_url = f"{branding.SITE_URL}/settings" @@ -200,17 +258,21 @@ async def run() -> None: jr.error = "all variants failed" return + # Build the per-language translation table once per job run. + active_non_en = sorted({l for l in ACTIVE_LANGUAGES if l != "en"}) + async with httpx.AsyncClient(follow_redirects=True) as client: + variant_table = await _translate_variants_for_active_langs( + client, variants, active_non_en, + ) + written = 0 for u in fresh: tone = (u.digest_tone or "INTERMEDIATE").upper() - # Fall back to INTERMEDIATE first (the more common tone) and then - # to whatever variant succeeded, so an asymmetric LLM failure - # doesn't silently skip the user. - content = (variants.get(tone) - or variants.get("INTERMEDIATE") - or next(iter(variants.values()), None)) - if content is None: - continue + content = _pick_variant( + variant_table, + tone=tone, + lang=(u.lang or "en"), + ) await _send_one(u, kind, content, date_str, session) await asyncio.sleep(0.1) written += 1 diff --git a/tests/test_localization_integration.py b/tests/test_localization_integration.py index 680fe25..ebe8a21 100644 --- a/tests/test_localization_integration.py +++ b/tests/test_localization_integration.py @@ -250,3 +250,85 @@ async def test_analyse_no_clause_when_lang_is_en(tmp_path, monkeypatch): await pa.analyse(session, req) system = next(m["content"] for m in captured["messages"] if m["role"] == "system") assert "Respond in" not in system + + +@pytest.mark.asyncio +async def test_digest_translates_variants_per_active_lang(monkeypatch): + """After English variants are built, the job translates each to every + active non-en lang. The result is an in-memory mapping the send loop + consults.""" + from unittest.mock import MagicMock + from app.jobs import email_digest_job as ed + from app.services.openrouter import LogResult + + english_variants = { + "NOVICE": "**Today.** Markets calmer.", + "INTERMEDIATE": "**Today.** Indices slightly down.", + "PRO": "**Today.** Risk-off rotation, breadth weak.", + } + + translate_calls: list[tuple[str, str]] = [] + + async def _fake_translate(client, text, target_lang): + translate_calls.append((text, target_lang)) + return f"[IT] {text}", LogResult( + content=f"[IT] {text}", model="m", + prompt_tokens=10, completion_tokens=10, cost_usd=0.0, + ) + + monkeypatch.setattr(ed, "translate", _fake_translate) + + client = MagicMock() + table = await ed._translate_variants_for_active_langs( + client, english_variants, ["it"], + ) + + # Three tones × one non-en lang = three translation calls. + assert len(translate_calls) == 3 + assert {lang for _, lang in translate_calls} == {"it"} + + # English entries are present unchanged. + assert table[("NOVICE", "en")] == english_variants["NOVICE"] + assert table[("PRO", "en")] == english_variants["PRO"] + # Italian entries are populated. + assert table[("INTERMEDIATE", "it")].startswith("[IT] ") + + +@pytest.mark.asyncio +async def test_digest_translation_failure_falls_back_to_english(monkeypatch): + """When translate() fails for a (tone, lang) cell, the table entry + for that cell is the English variant of the same tone — the user + still gets a digest, just in English that day.""" + from unittest.mock import MagicMock + from app.jobs import email_digest_job as ed + + english_variants = {"INTERMEDIATE": "**Today.** Indices down."} + + async def _fake_translate(client, text, target_lang): + raise RuntimeError("upstream down") + monkeypatch.setattr(ed, "translate", _fake_translate) + + client = MagicMock() + table = await ed._translate_variants_for_active_langs( + client, english_variants, ["it"], + ) + + assert table[("INTERMEDIATE", "it")] == english_variants["INTERMEDIATE"] + + +def test_digest_pick_variant_uses_user_lang(): + """The variant-picker helper consults user.digest_tone + user.lang.""" + from app.jobs import email_digest_job as ed + + table = { + ("NOVICE", "en"): "novice en", + ("NOVICE", "it"): "novice it", + ("INTERMEDIATE", "en"): "intermediate en", + ("INTERMEDIATE", "it"): "intermediate it", + } + assert ed._pick_variant(table, tone="NOVICE", lang="it") == "novice it" + assert ed._pick_variant(table, tone="INTERMEDIATE", lang="en") == "intermediate en" + # Missing lang → fallback to English variant of the same tone. + assert ed._pick_variant(table, tone="NOVICE", lang="de") == "novice en" + # Missing tone → fallback to INTERMEDIATE/en (the safe default). + assert ed._pick_variant(table, tone="UNKNOWN", lang="en") == "intermediate en"