digest: translate variants once per active non-en language

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
Giorgio Gilestro 2026-05-27 17:07:18 +02:00
parent d318039ad5
commit 924f37548b
2 changed files with 152 additions and 8 deletions

View file

@ -30,6 +30,7 @@ from app.models import EmailSend, User
from app.routers.email import sign_unsubscribe_token from app.routers.email import sign_unsubscribe_token
from app.services.access import paid_status from app.services.access import paid_status
from app.services.email_service import render_digest_email, send_email from app.services.email_service import render_digest_email, send_email
from app.services.i18n import ACTIVE_LANGUAGES
from app.services.openrouter import ( from app.services.openrouter import (
PROMPT_VERSION, PROMPT_VERSION,
build_daily_digest_prompt, build_daily_digest_prompt,
@ -37,6 +38,7 @@ from app.services.openrouter import (
call_llm, call_llm,
llm_configured, llm_configured,
) )
from app.services.translation import translate
def _now() -> datetime: def _now() -> datetime:
@ -116,6 +118,62 @@ def _kind_for_today(today: datetime) -> str:
return "weekly" if today.weekday() == 6 else "daily" return "weekly" if today.weekday() == 6 else "daily"
async def _translate_variants_for_active_langs(
client,
english_variants: dict[str, str],
target_langs: list[str],
) -> dict[tuple[str, str], str]:
"""Build a {(tone, lang): content_md} table.
Starts with the English variants as the canonical cells. For each
(tone, target_lang) pair where target_lang != 'en', calls translate()
in parallel; on failure the cell falls back to the English variant
of the same tone so the digest still goes out, just untranslated.
"""
table: dict[tuple[str, str], str] = {
(tone, "en"): content for tone, content in english_variants.items()
}
pairs = [
(tone, lang)
for tone in english_variants
for lang in target_langs
if lang != "en"
]
if not pairs:
return table
results = await asyncio.gather(*[
translate(client, english_variants[tone], lang) for tone, lang in pairs
], return_exceptions=True)
for (tone, lang), result in zip(pairs, results):
if isinstance(result, Exception):
log.warning("digest.translate.failed",
tone=tone, lang=lang, error=str(result)[:200])
table[(tone, lang)] = english_variants[tone]
continue
translated_md, _llm_log = result
table[(tone, lang)] = translated_md
return table
def _pick_variant(
table: dict[tuple[str, str], str], tone: str, lang: str,
) -> str:
"""Return the digest content for a recipient.
Lookup order: exact (tone, lang) (tone, 'en') ('INTERMEDIATE',
'en') first table value. The last falls are defensive; the table
always contains at least one English entry when the job is sending.
"""
if (tone, lang) in table:
return table[(tone, lang)]
if (tone, "en") in table:
return table[(tone, "en")]
if ("INTERMEDIATE", "en") in table:
return table[("INTERMEDIATE", "en")]
return next(iter(table.values()))
async def _send_one(user: User, kind: str, content_html: str, date_str: str, async def _send_one(user: User, kind: str, content_html: str, date_str: str,
session) -> None: session) -> None:
settings_url = f"{branding.SITE_URL}/settings" settings_url = f"{branding.SITE_URL}/settings"
@ -200,17 +258,21 @@ async def run() -> None:
jr.error = "all variants failed" jr.error = "all variants failed"
return return
# Build the per-language translation table once per job run.
active_non_en = sorted({l for l in ACTIVE_LANGUAGES if l != "en"})
async with httpx.AsyncClient(follow_redirects=True) as client:
variant_table = await _translate_variants_for_active_langs(
client, variants, active_non_en,
)
written = 0 written = 0
for u in fresh: for u in fresh:
tone = (u.digest_tone or "INTERMEDIATE").upper() tone = (u.digest_tone or "INTERMEDIATE").upper()
# Fall back to INTERMEDIATE first (the more common tone) and then content = _pick_variant(
# to whatever variant succeeded, so an asymmetric LLM failure variant_table,
# doesn't silently skip the user. tone=tone,
content = (variants.get(tone) lang=(u.lang or "en"),
or variants.get("INTERMEDIATE") )
or next(iter(variants.values()), None))
if content is None:
continue
await _send_one(u, kind, content, date_str, session) await _send_one(u, kind, content, date_str, session)
await asyncio.sleep(0.1) await asyncio.sleep(0.1)
written += 1 written += 1

View file

@ -250,3 +250,85 @@ async def test_analyse_no_clause_when_lang_is_en(tmp_path, monkeypatch):
await pa.analyse(session, req) await pa.analyse(session, req)
system = next(m["content"] for m in captured["messages"] if m["role"] == "system") system = next(m["content"] for m in captured["messages"] if m["role"] == "system")
assert "Respond in" not in system assert "Respond in" not in system
@pytest.mark.asyncio
async def test_digest_translates_variants_per_active_lang(monkeypatch):
"""After English variants are built, the job translates each to every
active non-en lang. The result is an in-memory mapping the send loop
consults."""
from unittest.mock import MagicMock
from app.jobs import email_digest_job as ed
from app.services.openrouter import LogResult
english_variants = {
"NOVICE": "**Today.** Markets calmer.",
"INTERMEDIATE": "**Today.** Indices slightly down.",
"PRO": "**Today.** Risk-off rotation, breadth weak.",
}
translate_calls: list[tuple[str, str]] = []
async def _fake_translate(client, text, target_lang):
translate_calls.append((text, target_lang))
return f"[IT] {text}", LogResult(
content=f"[IT] {text}", model="m",
prompt_tokens=10, completion_tokens=10, cost_usd=0.0,
)
monkeypatch.setattr(ed, "translate", _fake_translate)
client = MagicMock()
table = await ed._translate_variants_for_active_langs(
client, english_variants, ["it"],
)
# Three tones × one non-en lang = three translation calls.
assert len(translate_calls) == 3
assert {lang for _, lang in translate_calls} == {"it"}
# English entries are present unchanged.
assert table[("NOVICE", "en")] == english_variants["NOVICE"]
assert table[("PRO", "en")] == english_variants["PRO"]
# Italian entries are populated.
assert table[("INTERMEDIATE", "it")].startswith("[IT] ")
@pytest.mark.asyncio
async def test_digest_translation_failure_falls_back_to_english(monkeypatch):
"""When translate() fails for a (tone, lang) cell, the table entry
for that cell is the English variant of the same tone the user
still gets a digest, just in English that day."""
from unittest.mock import MagicMock
from app.jobs import email_digest_job as ed
english_variants = {"INTERMEDIATE": "**Today.** Indices down."}
async def _fake_translate(client, text, target_lang):
raise RuntimeError("upstream down")
monkeypatch.setattr(ed, "translate", _fake_translate)
client = MagicMock()
table = await ed._translate_variants_for_active_langs(
client, english_variants, ["it"],
)
assert table[("INTERMEDIATE", "it")] == english_variants["INTERMEDIATE"]
def test_digest_pick_variant_uses_user_lang():
"""The variant-picker helper consults user.digest_tone + user.lang."""
from app.jobs import email_digest_job as ed
table = {
("NOVICE", "en"): "novice en",
("NOVICE", "it"): "novice it",
("INTERMEDIATE", "en"): "intermediate en",
("INTERMEDIATE", "it"): "intermediate it",
}
assert ed._pick_variant(table, tone="NOVICE", lang="it") == "novice it"
assert ed._pick_variant(table, tone="INTERMEDIATE", lang="en") == "intermediate en"
# Missing lang → fallback to English variant of the same tone.
assert ed._pick_variant(table, tone="NOVICE", lang="de") == "novice en"
# Missing tone → fallback to INTERMEDIATE/en (the safe default).
assert ed._pick_variant(table, tone="UNKNOWN", lang="en") == "intermediate en"