i18n: stop truncating IT translations + localise the chat sidebar

Three connected fixes after the user spotted the 2026-05-28 IT log cutting off mid-sentence: 1. translation: bump max_tokens 4000 → 8000. call_llm()'s default cap was 4000, which is what the English log generator itself uses as its ceiling. Italian expands roughly 15-25 % over English in tokens, so any near-cap English source produced an IT translation that hit finish_reason=length and returned a truncated body — silently, because _call_provider() only raises when content is fully empty. The strategic_log_translations table has dozens of rows where completion_tokens landed at exactly 4000 with content well under half the source length. 8000 gives ample headroom for any of the five LANGUAGES we ship (en/it/es/fr/de). 2. log.html: localise the chat sidebar strings. user_lang was already passed into the template by pages.py, so an inline {% if user_lang == 'it' %} keeps it simple. Covers the "Ask Cassandra" title, the "grounded on…" hint, the helper lede, the textarea placeholder, and the Send button label. 3. chat endpoint: append respond_in_clause(user.lang) to the system prompt. The chat conversation can now happen in IT — the model's first reply lands in the right language even when the user's first turn is short. scripts/backfill_truncated_translations.py: one-off cleanup utility. Scans strategic_log_translations for rows whose translated content is < 70 % of the English source (the truncation signal — IT *expands* beyond English, so a shorter translation is always suspect), deletes them, and re-translates via the now-uncapped service. Supports --date, --since, --all and --dry-run. The 2026-05-28 fan-out has already been re-translated (13/13 rows). Other historical dates still hold older truncations; the user can decide whether to backfill those (the script is idempotent). Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 11:44:41 +02:00 · 2026-05-29 11:44:41 +02:00 · 48f022b71b
commit 48f022b71b
parent 3e1a14f334
4 changed files with 180 additions and 5 deletions
--- a/app/routers/chat.py
+++ b/app/routers/chat.py
@ -21,6 +21,7 @@ from app.db import get_session, utcnow
 from app.jobs._market_context import REFERENCE_LINE
 from app.models import AICall, Headline, Quote, StrategicLog
 from app.routers.api import _md_to_html
 from app.services.i18n import respond_in_clause
 from app.services.llm_prompts import build_chat_system_prompt
 from app.services.openrouter import call_llm, month_start
@ -160,6 +161,13 @@ async def chat(
        headlines=headlines,
        reference_line=REFERENCE_LINE,
    )
    # Respect the user's interface language preference: append a single
    # localized "respond in" nudge so the assistant answers in IT when
    # the user has lang=it. The prompt + history (which includes the
    # user's own question, often in their language) are usually enough,
    # but the nudge guarantees the first reply lands correctly.
    user_lang = principal.user.lang if principal and principal.user else "en"
    system_prompt = system_prompt + respond_in_clause(user_lang)
    msgs = [{"role": "system", "content": system_prompt}]
    for m in history:
--- a/app/services/translation.py
+++ b/app/services/translation.py
@ -65,7 +65,13 @@ async def translate(
        {"role": "system", "content": system_prompt},
        {"role": "user",   "content": text},
    ]
-    result = await call_llm(client, messages)
+    # Italian / Spanish / French / German typically expand the token count
    # 15-25 % over English (longer words, more sub-word splits). Our
    # strategic-log generator runs up to its own 4000-token cap, so a 4000
    # cap here would silently truncate any near-cap source. 8000 gives
    # ample headroom for every language we currently support and costs
    # nothing extra unless the model actually emits more tokens.
    result = await call_llm(client, messages, max_tokens=8000)
    content = (result.content or "").strip()
    # Strip code fences if the model wrapped its output despite the system rule.
--- a/app/templates/log.html
+++ b/app/templates/log.html
@ -33,21 +33,28 @@
    {% if paid %}
    <aside id="chat-sidebar" class="log-page__chat">
      <div class="chat-header">
-        <span class="chat-title">Ask Cassandra</span>
+        <span class="chat-title">{% if user_lang == 'it' %}Chiedi a Cassandra{% else %}Ask Cassandra{% endif %}</span>
-        <span class="chat-hint">grounded on the latest log + live data</span>
+        <span class="chat-hint">{% if user_lang == 'it' %}basato sull'ultimo log + dati in tempo reale{% else %}grounded on the latest log + live data{% endif %}</span>
      </div>
      <div id="chat-thread" class="chat-thread">
        <div class="chat-msg chat-msg--system">
          {% if user_lang == 'it' %}
          Fai domande sull'analisi di oggi. Il modello vede l'ultimo log
          strategico, le quotazioni di mercato in tempo reale per tutti i
          gruppi e le ultime 24h di titoli filtrati per tesi. Un refresh
          della pagina cancella questa conversazione.
          {% else %}
          Ask about today's analysis. The model sees the latest strategic log,
          live market readings across all groups, and the last 24h of
          thesis-filtered headlines. Refresh wipes this conversation.
          {% endif %}
        </div>
      </div>
      <form id="chat-form" class="chat-form" autocomplete="off">
        <textarea id="chat-input" rows="2"
-                  placeholder="e.g. why is the defence sleeve flat through Hormuz?"
+                  placeholder="{% if user_lang == 'it' %}es. perché il comparto difesa è piatto nonostante Hormuz?{% else %}e.g. why is the defence sleeve flat through Hormuz?{% endif %}"
                  required></textarea>
-        <button id="chat-send" type="submit">Send</button>
+        <button id="chat-send" type="submit">{% if user_lang == 'it' %}Invia{% else %}Send{% endif %}</button>
      </form>
    </aside>
    {% else %}
--- a/scripts/backfill_truncated_translations.py
+++ b/scripts/backfill_truncated_translations.py
@ -0,0 +1,154 @@
 """One-off backfill: re-translate StrategicLog rows whose Italian (or
 other-language) translation was truncated by the old 4000-token cap in
 services/translation.py.
 Selection criteria for a "truncated" row:
 - completion_tokens >= 3990 (right at or above the old cap), OR
 - the translated content is shorter than half the English source
 Usage inside the app container:
    docker compose exec app python -m scripts.backfill_truncated_translations \
        --date 2026-05-28               # restrict to one day, repeatable
    docker compose exec app python -m scripts.backfill_truncated_translations \
        --since 2026-04-01              # everything from a date onward
    docker compose exec app python -m scripts.backfill_truncated_translations \
        --all                            # entire history (slow / costs $$)
    docker compose exec app python -m scripts.backfill_truncated_translations \
        --date 2026-05-28 --dry-run     # just print what would be touched
 Idempotent: each affected row is deleted then re-inserted in its own
 transaction, so a re-run only re-translates rows that are STILL flagged
 truncated after the previous pass.
 """
 from __future__ import annotations
 import argparse
 import asyncio
 import sys
 from datetime import date, datetime
 import httpx
 from sqlalchemy import and_, delete, func, or_, select
 from app.db import get_session_factory
 from app.logging import get_logger
 from app.models import StrategicLog, StrategicLogTranslation
 from app.services.translation import translate
 log = get_logger("backfill.translations")
 # Italian (and the other expansive Romance / Germanic targets we support)
 # typically produce 15-25 % MORE characters than the English source, so
 # a translation shorter than the source — let alone much shorter — is a
 # truncation signal even if completion_tokens didn't land exactly at the
 # old 4000-token cap. We tolerate down to 70 % of source length to avoid
 # touching the occasional legitimately-compressed translation.
 SHORTNESS_RATIO = 0.7
 def _is_truncated(en_chars: int, tr_chars: int, tr_completion: int | None) -> bool:
    if en_chars <= 0:
        return False
    return tr_chars < en_chars * SHORTNESS_RATIO
 async def _find_targets(session, day: date | None, since: date | None, all_: bool):
    q = (
        select(
            StrategicLog.id.label("log_id"),
            StrategicLog.generated_at,
            func.char_length(StrategicLog.content).label("en_chars"),
            StrategicLogTranslation.id.label("tr_id"),
            StrategicLogTranslation.lang,
            StrategicLogTranslation.completion_tokens.label("tr_tok"),
            func.char_length(StrategicLogTranslation.content).label("tr_chars"),
        )
        .join(StrategicLogTranslation,
              StrategicLogTranslation.log_id == StrategicLog.id)
    )
    if day is not None:
        q = q.where(func.date(StrategicLog.generated_at) == day)
    elif since is not None:
        q = q.where(StrategicLog.generated_at >= since)
    # all_ → no date filter
    q = q.order_by(StrategicLog.generated_at, StrategicLogTranslation.lang)
    rows = (await session.execute(q)).all()
    return [r for r in rows if _is_truncated(r.en_chars, r.tr_chars, r.tr_tok)]
 async def _retranslate_one(session, client: httpx.AsyncClient, log_id: int, lang: str):
    """Delete the existing (log_id, lang) translation row and write a fresh
    one via the (now uncapped) translation service. Each row commits
    independently so a per-row failure doesn't roll back the rest."""
    src_row = (await session.execute(
        select(StrategicLog).where(StrategicLog.id == log_id)
    )).scalar_one_or_none()
    if src_row is None:
        log.warning("backfill.missing_source", log_id=log_id)
        return False
    await session.execute(
        delete(StrategicLogTranslation)
        .where(StrategicLogTranslation.log_id == log_id)
        .where(StrategicLogTranslation.lang == lang)
    )
    await session.commit()
    try:
        translated_md, llm_result = await translate(client, src_row.content, lang)
    except Exception as exc:
        log.warning("backfill.translate_failed",
                    log_id=log_id, lang=lang, error=str(exc)[:200])
        return False
    session.add(StrategicLogTranslation(
        log_id=log_id,
        lang=lang,
        content=translated_md,
        model=llm_result.model,
        prompt_tokens=llm_result.prompt_tokens,
        completion_tokens=llm_result.completion_tokens,
        cost_usd=llm_result.cost_usd,
    ))
    await session.commit()
    return True
 async def main(args):
    day = datetime.strptime(args.date, "%Y-%m-%d").date() if args.date else None
    since = datetime.strptime(args.since, "%Y-%m-%d").date() if args.since else None
    if not (day or since or args.all):
        print("Specify --date, --since, or --all", file=sys.stderr)
        sys.exit(2)
    session_factory = get_session_factory()
    async with session_factory() as session:
        targets = await _find_targets(session, day, since, args.all)
        print(f"Found {len(targets)} truncated translation row(s):")
        for r in targets:
            print(f"  log_id={r.log_id} lang={r.lang} "
                  f"en={r.en_chars}c tr={r.tr_chars}c "
                  f"tok={r.tr_tok} at {r.generated_at}")
        if args.dry_run or not targets:
            return
        ok = 0
        async with httpx.AsyncClient(follow_redirects=True) as client:
            for r in targets:
                print(f"  re-translating log_id={r.log_id} lang={r.lang}…", end=" ")
                done = await _retranslate_one(session, client, r.log_id, r.lang)
                print("OK" if done else "FAILED")
                if done:
                    ok += 1
        print(f"\nRe-translated {ok}/{len(targets)} row(s).")
 if __name__ == "__main__":
    p = argparse.ArgumentParser()
    grp = p.add_mutually_exclusive_group()
    grp.add_argument("--date", help="single day YYYY-MM-DD")
    grp.add_argument("--since", help="from YYYY-MM-DD onward")
    grp.add_argument("--all", action="store_true", help="entire history")
    p.add_argument("--dry-run", action="store_true",
                   help="list affected rows without rewriting")
    asyncio.run(main(p.parse_args()))