i18n: stop truncating IT translations + localise the chat sidebar

Three connected fixes after the user spotted the 2026-05-28 IT log cutting off mid-sentence: 1. translation: bump max_tokens 4000 → 8000. call_llm()'s default cap was 4000, which is what the English log generator itself uses as its ceiling. Italian expands roughly 15-25 % over English in tokens, so any near-cap English source produced an IT translation that hit finish_reason=length and returned a truncated body — silently, because _call_provider() only raises when content is fully empty. The strategic_log_translations table has dozens of rows where completion_tokens landed at exactly 4000 with content well under half the source length. 8000 gives ample headroom for any of the five LANGUAGES we ship (en/it/es/fr/de). 2. log.html: localise the chat sidebar strings. user_lang was already passed into the template by pages.py, so an inline {% if user_lang == 'it' %} keeps it simple. Covers the "Ask Cassandra" title, the "grounded on…" hint, the helper lede, the textarea placeholder, and the Send button label. 3. chat endpoint: append respond_in_clause(user.lang) to the system prompt. The chat conversation can now happen in IT — the model's first reply lands in the right language even when the user's first turn is short. scripts/backfill_truncated_translations.py: one-off cleanup utility. Scans strategic_log_translations for rows whose translated content is < 70 % of the English source (the truncation signal — IT *expands* beyond English, so a shorter translation is always suspect), deletes them, and re-translates via the now-uncapped service. Supports --date, --since, --all and --dry-run. The 2026-05-28 fan-out has already been re-translated (13/13 rows). Other historical dates still hold older truncations; the user can decide whether to backfill those (the script is idempotent). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 11:44:41 +02:00 · 2026-05-29 11:44:41 +02:00 · 48f022b71b
commit 48f022b71b
parent 3e1a14f334
4 changed files with 180 additions and 5 deletions
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -21,6 +21,7 @@ from app.db import get_session, utcnow
 from app.jobs._market_context import REFERENCE_LINE
 from app.models import AICall, Headline, Quote, StrategicLog
 from app.routers.api import _md_to_html
+from app.services.i18n import respond_in_clause
 from app.services.llm_prompts import build_chat_system_prompt
 from app.services.openrouter import call_llm, month_start

@ -160,6 +161,13 @@ async def chat(
        headlines=headlines,
        reference_line=REFERENCE_LINE,
    )
+    # Respect the user's interface language preference: append a single
+    # localized "respond in" nudge so the assistant answers in IT when
+    # the user has lang=it. The prompt + history (which includes the
+    # user's own question, often in their language) are usually enough,
+    # but the nudge guarantees the first reply lands correctly.
+    user_lang = principal.user.lang if principal and principal.user else "en"
+    system_prompt = system_prompt + respond_in_clause(user_lang)

    msgs = [{"role": "system", "content": system_prompt}]
    for m in history:
--- a/app/services/translation.py
+++ b/app/services/translation.py
@ -65,7 +65,13 @@ async def translate(
        {"role": "system", "content": system_prompt},
        {"role": "user",   "content": text},
    ]
-    result = await call_llm(client, messages)
+    # Italian / Spanish / French / German typically expand the token count
+    # 15-25 % over English (longer words, more sub-word splits). Our
+    # strategic-log generator runs up to its own 4000-token cap, so a 4000
+    # cap here would silently truncate any near-cap source. 8000 gives
+    # ample headroom for every language we currently support and costs
+    # nothing extra unless the model actually emits more tokens.
+    result = await call_llm(client, messages, max_tokens=8000)

    content = (result.content or "").strip()
    # Strip code fences if the model wrapped its output despite the system rule.
--- a/app/templates/log.html
+++ b/app/templates/log.html
@ -33,21 +33,28 @@
    {% if paid %}
    <aside id="chat-sidebar" class="log-page__chat">
      <div class="chat-header">
-        <span class="chat-title">Ask Cassandra</span>
-        <span class="chat-hint">grounded on the latest log + live data</span>
+        <span class="chat-title">{% if user_lang == 'it' %}Chiedi a Cassandra{% else %}Ask Cassandra{% endif %}</span>
+        <span class="chat-hint">{% if user_lang == 'it' %}basato sull'ultimo log + dati in tempo reale{% else %}grounded on the latest log + live data{% endif %}</span>
      </div>
      <div id="chat-thread" class="chat-thread">
        <div class="chat-msg chat-msg--system">
+          {% if user_lang == 'it' %}
+          Fai domande sull'analisi di oggi. Il modello vede l'ultimo log
+          strategico, le quotazioni di mercato in tempo reale per tutti i
+          gruppi e le ultime 24h di titoli filtrati per tesi. Un refresh
+          della pagina cancella questa conversazione.
+          {% else %}
          Ask about today's analysis. The model sees the latest strategic log,
          live market readings across all groups, and the last 24h of
          thesis-filtered headlines. Refresh wipes this conversation.
+          {% endif %}
        </div>
      </div>
      <form id="chat-form" class="chat-form" autocomplete="off">
        <textarea id="chat-input" rows="2"
-                  placeholder="e.g. why is the defence sleeve flat through Hormuz?"
+                  placeholder="{% if user_lang == 'it' %}es. perché il comparto difesa è piatto nonostante Hormuz?{% else %}e.g. why is the defence sleeve flat through Hormuz?{% endif %}"
                  required></textarea>
-        <button id="chat-send" type="submit">Send</button>
+        <button id="chat-send" type="submit">{% if user_lang == 'it' %}Invia{% else %}Send{% endif %}</button>
      </form>
    </aside>
    {% else %}
--- a/scripts/backfill_truncated_translations.py
+++ b/scripts/backfill_truncated_translations.py
@ -0,0 +1,154 @@
+"""One-off backfill: re-translate StrategicLog rows whose Italian (or
+other-language) translation was truncated by the old 4000-token cap in
+services/translation.py.
+
+Selection criteria for a "truncated" row:
+- completion_tokens >= 3990 (right at or above the old cap), OR
+- the translated content is shorter than half the English source
+
+Usage inside the app container:
+    docker compose exec app python -m scripts.backfill_truncated_translations \
+        --date 2026-05-28               # restrict to one day, repeatable
+    docker compose exec app python -m scripts.backfill_truncated_translations \
+        --since 2026-04-01              # everything from a date onward
+    docker compose exec app python -m scripts.backfill_truncated_translations \
+        --all                            # entire history (slow / costs $$)
+    docker compose exec app python -m scripts.backfill_truncated_translations \
+        --date 2026-05-28 --dry-run     # just print what would be touched
+
+Idempotent: each affected row is deleted then re-inserted in its own
+transaction, so a re-run only re-translates rows that are STILL flagged
+truncated after the previous pass.
+"""
+from __future__ import annotations
+
+import argparse
+import asyncio
+import sys
+from datetime import date, datetime
+
+import httpx
+from sqlalchemy import and_, delete, func, or_, select
+
+from app.db import get_session_factory
+from app.logging import get_logger
+from app.models import StrategicLog, StrategicLogTranslation
+from app.services.translation import translate
+
+log = get_logger("backfill.translations")
+
+# Italian (and the other expansive Romance / Germanic targets we support)
+# typically produce 15-25 % MORE characters than the English source, so
+# a translation shorter than the source — let alone much shorter — is a
+# truncation signal even if completion_tokens didn't land exactly at the
+# old 4000-token cap. We tolerate down to 70 % of source length to avoid
+# touching the occasional legitimately-compressed translation.
+SHORTNESS_RATIO = 0.7
+
+
+def _is_truncated(en_chars: int, tr_chars: int, tr_completion: int | None) -> bool:
+    if en_chars <= 0:
+        return False
+    return tr_chars < en_chars * SHORTNESS_RATIO
+
+
+async def _find_targets(session, day: date | None, since: date | None, all_: bool):
+    q = (
+        select(
+            StrategicLog.id.label("log_id"),
+            StrategicLog.generated_at,
+            func.char_length(StrategicLog.content).label("en_chars"),
+            StrategicLogTranslation.id.label("tr_id"),
+            StrategicLogTranslation.lang,
+            StrategicLogTranslation.completion_tokens.label("tr_tok"),
+            func.char_length(StrategicLogTranslation.content).label("tr_chars"),
+        )
+        .join(StrategicLogTranslation,
+              StrategicLogTranslation.log_id == StrategicLog.id)
+    )
+    if day is not None:
+        q = q.where(func.date(StrategicLog.generated_at) == day)
+    elif since is not None:
+        q = q.where(StrategicLog.generated_at >= since)
+    # all_ → no date filter
+    q = q.order_by(StrategicLog.generated_at, StrategicLogTranslation.lang)
+    rows = (await session.execute(q)).all()
+    return [r for r in rows if _is_truncated(r.en_chars, r.tr_chars, r.tr_tok)]
+
+
+async def _retranslate_one(session, client: httpx.AsyncClient, log_id: int, lang: str):
+    """Delete the existing (log_id, lang) translation row and write a fresh
+    one via the (now uncapped) translation service. Each row commits
+    independently so a per-row failure doesn't roll back the rest."""
+    src_row = (await session.execute(
+        select(StrategicLog).where(StrategicLog.id == log_id)
+    )).scalar_one_or_none()
+    if src_row is None:
+        log.warning("backfill.missing_source", log_id=log_id)
+        return False
+
+    await session.execute(
+        delete(StrategicLogTranslation)
+        .where(StrategicLogTranslation.log_id == log_id)
+        .where(StrategicLogTranslation.lang == lang)
+    )
+    await session.commit()
+
+    try:
+        translated_md, llm_result = await translate(client, src_row.content, lang)
+    except Exception as exc:
+        log.warning("backfill.translate_failed",
+                    log_id=log_id, lang=lang, error=str(exc)[:200])
+        return False
+
+    session.add(StrategicLogTranslation(
+        log_id=log_id,
+        lang=lang,
+        content=translated_md,
+        model=llm_result.model,
+        prompt_tokens=llm_result.prompt_tokens,
+        completion_tokens=llm_result.completion_tokens,
+        cost_usd=llm_result.cost_usd,
+    ))
+    await session.commit()
+    return True
+
+
+async def main(args):
+    day = datetime.strptime(args.date, "%Y-%m-%d").date() if args.date else None
+    since = datetime.strptime(args.since, "%Y-%m-%d").date() if args.since else None
+    if not (day or since or args.all):
+        print("Specify --date, --since, or --all", file=sys.stderr)
+        sys.exit(2)
+
+    session_factory = get_session_factory()
+    async with session_factory() as session:
+        targets = await _find_targets(session, day, since, args.all)
+        print(f"Found {len(targets)} truncated translation row(s):")
+        for r in targets:
+            print(f"  log_id={r.log_id} lang={r.lang} "
+                  f"en={r.en_chars}c tr={r.tr_chars}c "
+                  f"tok={r.tr_tok} at {r.generated_at}")
+        if args.dry_run or not targets:
+            return
+
+        ok = 0
+        async with httpx.AsyncClient(follow_redirects=True) as client:
+            for r in targets:
+                print(f"  re-translating log_id={r.log_id} lang={r.lang}…", end=" ")
+                done = await _retranslate_one(session, client, r.log_id, r.lang)
+                print("OK" if done else "FAILED")
+                if done:
+                    ok += 1
+        print(f"\nRe-translated {ok}/{len(targets)} row(s).")
+
+
+if __name__ == "__main__":
+    p = argparse.ArgumentParser()
+    grp = p.add_mutually_exclusive_group()
+    grp.add_argument("--date", help="single day YYYY-MM-DD")
+    grp.add_argument("--since", help="from YYYY-MM-DD onward")
+    grp.add_argument("--all", action="store_true", help="entire history")
+    p.add_argument("--dry-run", action="store_true",
+                   help="list affected rows without rewriting")
+    asyncio.run(main(p.parse_args()))