From 2ecf250d539e0c840e15461409771df1959b1b21 Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Wed, 27 May 2026 16:22:41 +0200 Subject: [PATCH] localization: digest is shared, not per-user (corrected design) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user pointed out that the only genuinely per-user AI surface is portfolio analysis. The strategic log AND the email digest are both shared cycles — generated once per cycle, consumed by many users. For the digest, this means: - _generate_variants still produces one English variant per tone (as today, unchanged) - A new helper translates each variant once per active non-en lang in parallel via asyncio.gather, producing a {(tone, lang): content} table for the duration of the job run - The per-user send loop selects (user.digest_tone, user.lang), falling back to the English variant of the same tone on miss Translation count per run = tones × non-en active langs = 3 today. 100 Italian users no longer mean 100 translation calls. Co-Authored-By: Claude Opus 4.7 --- .../plans/2026-05-27-localization-italian.md | 269 +++++++++++++----- .../2026-05-27-localization-italian-design.md | 121 +++++--- 2 files changed, 279 insertions(+), 111 deletions(-) diff --git a/docs/superpowers/plans/2026-05-27-localization-italian.md b/docs/superpowers/plans/2026-05-27-localization-italian.md index 363689b..eaef3e2 100644 --- a/docs/superpowers/plans/2026-05-27-localization-italian.md +++ b/docs/superpowers/plans/2026-05-27-localization-italian.md @@ -1020,111 +1020,250 @@ git commit -m "analyse: thread user.lang into the system prompt" --- -### Task 7: email_digest_job localization +### Task 7: email_digest_job — translate variants once, route by (tone, lang) **Files:** - Modify: `app/jobs/email_digest_job.py` - Test: `tests/test_localization_integration.py` (append) -- [ ] **Step 1: Write failing test** +**Design recap:** The digest job already produces one English variant per +tone (NOVICE / INTERMEDIATE / PRO) once per job run. After those English +variants are built, the job translates each one to every active +non-English language in parallel and builds an in-memory lookup +`{(tone, lang): content_md}`. The per-user send step picks the cell +matching `(user.digest_tone, user.lang)`, falling back to `(tone, 'en')` +when a translation is missing or failed. No per-user LLM call. + +- [ ] **Step 1: Inspect the existing digest flow** + +```bash +grep -n "_generate_variants\|_send_one\|active_users\|for .* in .*users" app/jobs/email_digest_job.py | head -20 +``` + +Identify: +1. Where the English variants are built (one call per tone). +2. The shape of the returned object (likely `dict[str, str]` keyed by tone like `"NOVICE"`). +3. The per-user send loop and where it picks a variant for the recipient. + +- [ ] **Step 2: Write a failing test** Append to `tests/test_localization_integration.py`: ```python @pytest.mark.asyncio -async def test_digest_threads_lang_into_system_prompt(monkeypatch): - """The per-user digest generation appends 'Respond in Italian.' to - the system prompt when the user is Italian.""" - from unittest.mock import AsyncMock +async def test_digest_translates_variants_per_active_lang(monkeypatch): + """After English variants are built, the job translates each to every + active non-en lang. The result is an in-memory mapping the send loop + consults.""" + from unittest.mock import AsyncMock, MagicMock from app.jobs import email_digest_job as ed from app.services.openrouter import LogResult - captured = [] - - async def _fake_call_llm(client, messages, **kw): - captured.append(messages) - return LogResult( - content="**Apertura.** Il mercato è in calo.", - model="m", prompt_tokens=300, completion_tokens=400, cost_usd=0.0001, - ) - monkeypatch.setattr(ed, "call_llm", _fake_call_llm) - - # _generate_variants is the helper that runs one LLM call per tone. - # It takes a context dict and a 'kind' (daily/weekly). The exact - # signature is in app/jobs/email_digest_job.py — inspect before - # calling. The test below assumes it accepts a `target_lang` kwarg. - from datetime import datetime, timezone - - ctx = { - "today": datetime.now(timezone.utc), - "quotes_by_group": {}, - "headlines_by_bucket": {}, - "reference_line": None, + # Stub the English variant builder so we control the input set. + english_variants = { + "NOVICE": "**Today.** Markets calmer.", + "INTERMEDIATE": "**Today.** Indices slightly down.", + "PRO": "**Today.** Risk-off rotation, breadth weak.", } - # `_generate_variants` should iterate tones internally; we just need - # to assert at least one captured system prompt has the IT clause. - import httpx - async with httpx.AsyncClient() as client: - await ed._generate_variants(None, client, "daily", ctx, target_lang="it") + # Track every translate() call so we can assert fan-out shape. + translate_calls: list[tuple[str, str]] = [] - assert captured, "no LLM call was made" - italian_found = any( - any( - m["role"] == "system" and "Respond in Italian" in m["content"] - for m in messages + async def _fake_translate(client, text, target_lang): + translate_calls.append((text, target_lang)) + return f"[IT] {text}", LogResult( + content=f"[IT] {text}", model="m", + prompt_tokens=10, completion_tokens=10, cost_usd=0.0, ) - for messages in captured + + monkeypatch.setattr(ed, "translate", _fake_translate) + + # The helper under test takes the English variants dict + a list of + # active non-en languages, returns the {(tone, lang): content} table. + client = MagicMock() + table = await ed._translate_variants_for_active_langs( + client, english_variants, ["it"], ) - assert italian_found, "no system prompt contained 'Respond in Italian'" + + # Three tones × one non-en lang = three translation calls. + assert len(translate_calls) == 3 + assert {lang for _, lang in translate_calls} == {"it"} + + # English entries are present unchanged. + assert table[("NOVICE", "en")] == english_variants["NOVICE"] + assert table[("PRO", "en")] == english_variants["PRO"] + # Italian entries are populated. + assert table[("INTERMEDIATE", "it")].startswith("[IT] ") + + +@pytest.mark.asyncio +async def test_digest_translation_failure_falls_back_to_english(monkeypatch): + """When translate() fails for a (tone, lang) cell, the table entry + for that cell is the English variant of the same tone — the user + still gets a digest, just in English that day.""" + from app.jobs import email_digest_job as ed + + english_variants = {"INTERMEDIATE": "**Today.** Indices down."} + + async def _fake_translate(client, text, target_lang): + raise RuntimeError("upstream down") + monkeypatch.setattr(ed, "translate", _fake_translate) + + from unittest.mock import MagicMock + client = MagicMock() + table = await ed._translate_variants_for_active_langs( + client, english_variants, ["it"], + ) + + assert table[("INTERMEDIATE", "it")] == english_variants["INTERMEDIATE"] + + +def test_digest_pick_variant_uses_user_lang(): + """The variant-picker helper consults user.digest_tone + user.lang.""" + from app.jobs import email_digest_job as ed + + table = { + ("NOVICE", "en"): "novice en", + ("NOVICE", "it"): "novice it", + ("INTERMEDIATE", "en"): "intermediate en", + ("INTERMEDIATE", "it"): "intermediate it", + } + assert ed._pick_variant(table, tone="NOVICE", lang="it") == "novice it" + assert ed._pick_variant(table, tone="INTERMEDIATE", lang="en") == "intermediate en" + # Missing lang → fallback to English variant of the same tone. + assert ed._pick_variant(table, tone="NOVICE", lang="de") == "novice en" + # Missing tone → fallback to INTERMEDIATE/en (the safe default). + assert ed._pick_variant(table, tone="UNKNOWN", lang="en") == "intermediate en" ``` -- [ ] **Step 2: Run test to verify it fails** +- [ ] **Step 3: Run tests to verify they fail** ```bash -docker compose -f docker-compose.test.yml run --rm test pytest tests/test_localization_integration.py -k digest_threads_lang -v +docker compose -f docker-compose.test.yml run --rm test pytest tests/test_localization_integration.py -k "digest_translates or digest_translation_failure or digest_pick" -v ``` -Expected: FAIL — either `_generate_variants` doesn't accept `target_lang`, or the IT clause isn't in the prompt. +Expected: 3 FAIL with `AttributeError` for `_translate_variants_for_active_langs` and `_pick_variant`. -- [ ] **Step 3: Thread `target_lang` through `_generate_variants` and the per-user driver** +- [ ] **Step 4: Implement the two helpers + wire them into the job** -In `app/jobs/email_digest_job.py`: +In `app/jobs/email_digest_job.py`, add the necessary imports at the top (skip any that are already present): -1. Import the helper: - ```python - from app.services.i18n import respond_in_clause - ``` +```python +import asyncio -2. Find `_generate_variants`. Add `target_lang: str = "en"` to its signature. Where it composes each variant's system prompt, append: - ```python - system_prompt = system_prompt + respond_in_clause(target_lang) - ``` +from app.services.i18n import ACTIVE_LANGUAGES +from app.services.translation import translate +``` -3. Find the per-user send path (the function that actually iterates users — likely `_send_for_user` or similar, called from the job's main loop). Where it calls `_generate_variants`, pass `target_lang=user.lang`: - ```python - variants = await _generate_variants( - session, client, kind, ctx, target_lang=user.lang, - ) - ``` +Add the two helpers as module-level functions: - If the existing call site is in the main job loop and constructs `variants` once for all users, that breaks the "per-user language" contract. In that case the variants must be generated PER USER, not globally. Look for the caller; if it caches `variants` across users, restructure to call `_generate_variants` inside the per-user loop. **Important:** if this requires more than a few lines of change, stop and report a concern — the existing assumption may be wrong and we want explicit guidance. +```python +async def _translate_variants_for_active_langs( + client, + english_variants: dict[str, str], + target_langs: list[str], +) -> dict[tuple[str, str], str]: + """Build a {(tone, lang): content_md} table. -- [ ] **Step 4: Run tests to verify they pass** + Starts with the English variants as the canonical cells. For each + (tone, target_lang) pair where target_lang != 'en', calls translate() + in parallel; on failure the cell falls back to the English variant + of the same tone so the digest still goes out, just untranslated. + """ + table: dict[tuple[str, str], str] = { + (tone, "en"): content for tone, content in english_variants.items() + } + pairs = [ + (tone, lang) + for tone in english_variants + for lang in target_langs + if lang != "en" + ] + if not pairs: + return table + + results = await asyncio.gather(*[ + translate(client, english_variants[tone], lang) for tone, lang in pairs + ], return_exceptions=True) + for (tone, lang), result in zip(pairs, results): + if isinstance(result, Exception): + log.warning("digest.translate.failed", + tone=tone, lang=lang, error=str(result)[:200]) + table[(tone, lang)] = english_variants[tone] + continue + translated_md, _llm_log = result + table[(tone, lang)] = translated_md + return table + + +def _pick_variant( + table: dict[tuple[str, str], str], tone: str, lang: str, +) -> str: + """Return the digest content for a recipient. + + Lookup order: exact (tone, lang) → (tone, 'en') → ('INTERMEDIATE', + 'en') → first table value. The last falls are defensive; the table + always contains at least one English entry when the job is sending.""" + if (tone, lang) in table: + return table[(tone, lang)] + if (tone, "en") in table: + return table[(tone, "en")] + if ("INTERMEDIATE", "en") in table: + return table[("INTERMEDIATE", "en")] + return next(iter(table.values())) +``` + +Now find the place in the job loop where English variants are generated +(after `_generate_variants` returns its tone-keyed dict) and before the +per-user send loop. Insert: + +```python +# Build the per-language translation table once per job run. Active +# non-en languages are derived from users.lang so we don't translate +# for languages no one uses today. +active_non_en = sorted({l for l in ACTIVE_LANGUAGES if l != "en"}) +# Optional further filter: only languages with at least one user. +# (See task notes — defer if optimization isn't worth it yet.) +variant_table = await _translate_variants_for_active_langs( + client, variants, active_non_en, +) +``` + +And in the per-user send step, replace the direct variant lookup +(e.g. `content = variants[user.digest_tone]`) with: + +```python +content = _pick_variant( + variant_table, + tone=(user.digest_tone or "INTERMEDIATE").upper(), + lang=(user.lang or "en"), +) +``` + +- [ ] **Step 5: Run tests to verify they pass** ```bash docker compose -f docker-compose.test.yml run --rm test pytest tests/test_localization_integration.py -v ``` -Expected: all tests pass (8 total now). +Expected: all tests pass (≥10 total now). -- [ ] **Step 5: Commit** +- [ ] **Step 6: Commit** ```bash git add app/jobs/email_digest_job.py tests/test_localization_integration.py -git commit -m "digest: thread user.lang into per-user generation" +git commit -m "digest: translate variants once per active non-en language" ``` +## Context + +- Translation count per job run is `tones × non-en active languages`. + Today that's `3 × 1 = 3` translation calls per digest run. Negligible cost. +- A failed translation degrades gracefully — the cell falls back to the + English variant of the same tone. The recipient receives a digest in + English instead of getting no email at all. This matches the spec's + "translation is best-effort" intent. + --- ### Task 8: /log endpoint localized fetch @@ -1569,7 +1708,7 @@ Expected: digest email lands in Italian, including the subject line. - **`translate()` helper** → Task 2 (no-op fast path for `en`/unknown; code-fence stripping; raises on provider failure) - **`ai_log_job` translation fan-out** → Task 5 (parallel via `asyncio.gather`; per-language failure isolated) - **Portfolio analysis `lang`-aware system prompt** → Task 6 -- **Email digest `lang`-aware per-user generation** → Task 7 +- **Email digest: shared variant generation, post-translation, (tone, lang) routing** → Task 7 - **`/log` localized fetch with English fallback** → Task 8 - **`PATCH /api/settings/language` with ACTIVE_LANGUAGES gate** → Task 9 - **Settings dropdown with IT active + ES/FR/DE disabled** → Task 10 @@ -1586,4 +1725,4 @@ Expected: digest email lands in Italian, including the subject line. - `_resolve_log_content(session, log_id, lang) -> str` — used in Task 8 only. - `translate_log_for_active_languages(session, log_id) -> None` — used in Task 5 only. -**Note on Task 7:** if `_generate_variants` is currently called ONCE for all users in the digest job (variants shared), the localization plan requires it to be called per-user. The plan flags this and asks the engineer to surface a concern rather than silently restructuring. If the structure differs from expectation, the engineer should escalate before proceeding. +**Note on Task 7:** the digest job is treated as shared content. `_generate_variants` keeps its existing per-tone behaviour unchanged; localization is layered on top via two new module-level helpers (`_translate_variants_for_active_langs`, `_pick_variant`) and a routing change in the per-user send loop. No restructuring of the existing tone-generation path is needed. Translation count per run is `tones × non-en active langs` (today: 3 calls/run) — negligible. diff --git a/docs/superpowers/specs/2026-05-27-localization-italian-design.md b/docs/superpowers/specs/2026-05-27-localization-italian-design.md index 1c03633..c704494 100644 --- a/docs/superpowers/specs/2026-05-27-localization-italian-design.md +++ b/docs/superpowers/specs/2026-05-27-localization-italian-design.md @@ -6,10 +6,10 @@ ## Context All AI-generated content (strategic log, daily email digest, portfolio -analysis, follow-up chat) is English-only today. The operator wants to -add Italian translation as the first localization, with Spanish, -French, and German listed as "coming soon" in the settings UI but not -yet functional. Italian must work end-to-end from settings dropdown to +analysis) is English-only today. The operator wants to add Italian +translation as the first localization, with Spanish, French, and +German listed as "coming soon" in the settings UI but not yet +functional. Italian must work end-to-end from settings dropdown to rendered output; the other three exist as commitments and design placeholders so adding them later is a flag flip. @@ -49,30 +49,44 @@ retrofit. The system has two categories of AI-generated content, with different generation patterns: -### Per-user content (analyse, digest, chat) +### Per-user content (portfolio analysis only) -Each call already produces output for exactly one user. The fix is -trivial: the user's `lang` threads into the prompt assembly, and the -system prompt gains a `"Respond in Italian."` clause when `lang != 'en'`. -One LLM call, no extra cost, no extra latency. +Portfolio analysis is the only AI-generated surface whose *content* is +genuinely per-user — each call's input is the user's own pie. Here we +add the `"Respond in Italian."` clause to the system prompt when +`user.lang != 'en'`. One LLM call, no extra cost, no extra latency. -### Shared content (strategic log) +### Shared content (strategic log, email digest) -The hourly `ai_log_job` writes a single English log row used by every -user. To serve non-English users, we generate the English log as today, -then translate it to each active non-English language via a separate -LLM call and store the result in a new `strategic_log_translations` -table. Translations are fanned out in parallel with `asyncio.gather` so -total translation time is max(single call), not sum. The `/log` -endpoint serves the translation matching the requester's `lang`, -falling back to English if none exists. +Strategic log and email digest are generated once per cycle (hourly, +daily) and consumed by many users. We do NOT generate them per-user +per-language. Instead: -Why translate-after rather than generate-N-times: the strategic log -includes live market data, headlines, and references that are -expensive to assemble. Re-running the full generation in each language -duplicates that work; translating the rendered output preserves a -single source of truth (the English original) and only spends LLM -tokens on the actual prose conversion. +- **Strategic log**: `ai_log_job` writes the English row as today, + then translates it to each active non-English language and persists + in `strategic_log_translations` (one row per `(log_id, lang)`). + `/log` serves the translation matching the user's `lang`, falling + back to English. + +- **Email digest**: the digest job already generates one English + variant per tone (NOVICE / INTERMEDIATE / PRO). We extend the same + cycle so that for each tone variant, the job ALSO produces a + translation for each active non-English language. The translations + live alongside the English variants in memory for the duration of + the job run; the per-user send step selects the matching + `(tone, lang)` cell. No new persistence — variants exist only for + the lifetime of the job. + +Why translate-after rather than generate-N-times: the shared content +involves expensive context assembly (live market data, headlines, log +history). Re-running the full generation in each language duplicates +that work; translating the rendered output preserves a single source +of truth and only spends LLM tokens on the actual prose conversion. + +Why no per-user LLM call for the digest: 100 Italian users would +otherwise mean 100 translation calls per day. With the shared cycle +we make 3 translations per day (one per tone) regardless of how many +Italian users receive that variant. ## Architecture @@ -82,18 +96,27 @@ tokens on the actual prose conversion. │ Values: 'en' (default) | 'it' (active) | 'es'/'fr'/'de' (WIP) │ └─────────────────────────────────────────────────────────────────┘ │ - ├─ Per-user surfaces (portfolio analyse, daily digest, chat) + ├─ Per-user surface (portfolio analysis only) │ └─ prompt assembly threads user.lang to │ respond_in_clause() → appended to system prompt │ when lang != 'en'. Single call_llm, no extra cost. │ - └─ Shared surfaces (strategic log) - ├─ ai_log_job writes the English row as today - ├─ Then SELECTs distinct users.lang where lang != 'en' - │ AND user has active paid access - ├─ asyncio.gather of one translate() call per language - └─ Each result → INSERT into strategic_log_translations - keyed by (log_id, lang) UNIQUE + ├─ Shared surface — strategic log + │ ├─ ai_log_job writes the English row as today + │ ├─ SELECTs distinct users.lang where lang != 'en' + │ │ (no tier gating) + │ ├─ asyncio.gather of one translate() call per language + │ └─ Each result → INSERT into strategic_log_translations + │ keyed by (log_id, lang) UNIQUE + │ + └─ Shared surface — email digest + ├─ Job builds one English variant per tone (existing + │ _generate_variants behaviour, unchanged) + ├─ For each (variant, active non-en lang), translate + │ via asyncio.gather; results live in memory + └─ Per-user send loop looks up (user.digest_tone, + user.lang) in the in-memory dictionary; falls back + to the English variant of the same tone on miss ``` ## Data model @@ -233,12 +256,18 @@ read time. ### `app/jobs/email_digest_job.py` (modified) -The digest is already per-user and assembles its own prompt. Thread -`user.lang` through: +The job already builds one English variant per tone in +`_generate_variants(...)`. After that returns, the job translates each +variant into every active non-English language (parallel via +`asyncio.gather`), and exposes a `(tone, lang) -> content` lookup that +`_send_one(...)` consults using the recipient's `user.lang`. -- `_generate_variants(...)` accepts a `target_lang` param -- The system prompt assembly appends `respond_in_clause(target_lang)` -- Subject-line generation runs in the same call, so it's localized too +- Variants live only in memory for the duration of the job run. +- A failed translation for `(tone, lang)` is logged and that cell + falls back to the English variant of the same tone. The send + proceeds — the user still gets a digest, just in English that day. +- The subject line is part of each variant's content, so it gets + translated as part of the same call. ### `app/services/portfolio_analysis.py` (modified) @@ -364,15 +393,15 @@ End-to-end manual check after deploy: - We do not translate UI labels. Italian users see English buttons, headings, and tooltips. Future scope. -- We do not translate user-generated content (chat questions the user - types). Only the AI's output is localized; user-supplied input flows - through unchanged. -- We do not translate the email subject line independently. The same - per-user LLM call that generates the digest body also generates the - subject in the target language. -- We do not surface translation cost in any user-visible UI. Cost is - recorded in `strategic_log_translations.llm_cost_usd` and the existing - `ai_calls` ledger picks up per-user calls as today. +- We do not translate user-supplied input (e.g. portfolio names, any + free-text fields). Only AI-generated output is localized. +- The email subject line is part of each variant's content, so it + gets translated alongside the body in the same `translate()` call + per (tone, lang) cell — no separate subject-translation path. +- We do not surface translation cost in any user-visible UI. Strategic + log translation cost lands in `strategic_log_translations.llm_cost_usd`; + digest translation cost is captured in the existing `ai_calls` ledger + via the underlying `call_llm` calls. - We do **not** gate strategic-log translation on user tier. Any user with `lang='it'` triggers Italian translation for that hour's log, regardless of whether they are paid, on credit, or free. Rationale: