read.markets/app/routers/chat.py
Giorgio Gilestro 48f022b71b i18n: stop truncating IT translations + localise the chat sidebar
Three connected fixes after the user spotted the 2026-05-28 IT log
cutting off mid-sentence:

1. translation: bump max_tokens 4000 → 8000.
   call_llm()'s default cap was 4000, which is what the English log
   generator itself uses as its ceiling. Italian expands roughly 15-25 %
   over English in tokens, so any near-cap English source produced an
   IT translation that hit finish_reason=length and returned a
   truncated body — silently, because _call_provider() only raises when
   content is fully empty. The strategic_log_translations table has
   dozens of rows where completion_tokens landed at exactly 4000 with
   content well under half the source length. 8000 gives ample
   headroom for any of the five LANGUAGES we ship (en/it/es/fr/de).

2. log.html: localise the chat sidebar strings.
   user_lang was already passed into the template by pages.py, so an
   inline {% if user_lang == 'it' %} keeps it simple. Covers the
   "Ask Cassandra" title, the "grounded on…" hint, the helper lede,
   the textarea placeholder, and the Send button label.

3. chat endpoint: append respond_in_clause(user.lang) to the system
   prompt. The chat conversation can now happen in IT — the model's
   first reply lands in the right language even when the user's first
   turn is short.

scripts/backfill_truncated_translations.py: one-off cleanup utility.
Scans strategic_log_translations for rows whose translated content is
< 70 % of the English source (the truncation signal — IT *expands*
beyond English, so a shorter translation is always suspect), deletes
them, and re-translates via the now-uncapped service. Supports --date,
--since, --all and --dry-run. The 2026-05-28 fan-out has already been
re-translated (13/13 rows). Other historical dates still hold older
truncations; the user can decide whether to backfill those (the script
is idempotent).

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 11:44:41 +02:00

201 lines
7.1 KiB
Python

"""Chat endpoint — POST /api/chat.
Grounded on the latest strategic log, current market quotes, and
thesis-filtered headlines. Ephemeral: the conversation lives in the
client; this endpoint just records each call's cost in `ai_calls`.
"""
from __future__ import annotations
from collections import defaultdict
from datetime import timedelta
import httpx
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, Field
from sqlalchemy import desc, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.auth import require_token, maybe_current_user, CurrentUser
from app.config import get_settings
from app.db import get_session, utcnow
from app.jobs._market_context import REFERENCE_LINE
from app.models import AICall, Headline, Quote, StrategicLog
from app.routers.api import _md_to_html
from app.services.i18n import respond_in_clause
from app.services.llm_prompts import build_chat_system_prompt
from app.services.openrouter import call_llm, month_start
router = APIRouter(dependencies=[Depends(require_token)])
# ---------------------------------------------------------------------------
# Pydantic models
# ---------------------------------------------------------------------------
class ChatMessage(BaseModel):
role: str = Field(pattern="^(user|assistant)$")
content: str
class ChatRequest(BaseModel):
messages: list[ChatMessage]
# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------
THESIS_KEYWORDS_FALLBACK = [
"hormuz", "iran", "opec", "brent", "wti", "crude", "oil",
"china", "taiwan", "yuan", "fed", "inflation", "cpi", "yield",
"gold", "dollar", "yen", "saudi", "russia", "ukraine", "israel",
"nato", "defence", "defense",
]
async def _latest_quotes_by_group_chat(session: AsyncSession) -> dict[str, list[dict]]:
sub = (
select(Quote.group_name, Quote.symbol,
func.max(Quote.fetched_at).label("mx"))
.group_by(Quote.group_name, Quote.symbol)
.subquery()
)
rows = (await session.execute(
select(Quote).join(
sub,
(Quote.group_name == sub.c.group_name)
& (Quote.symbol == sub.c.symbol)
& (Quote.fetched_at == sub.c.mx),
).order_by(Quote.group_name, Quote.symbol)
)).scalars().all()
by_group: dict[str, list[dict]] = defaultdict(list)
for q in rows:
by_group[q.group_name].append({
"symbol": q.symbol, "label": q.label,
"price": q.price, "currency": q.currency,
"as_of": q.as_of, "changes": q.changes,
})
return by_group
async def _thesis_headlines_for_chat(session: AsyncSession, limit: int = 50) -> list[dict]:
cutoff = utcnow() - timedelta(hours=24)
rows = (await session.execute(
select(Headline)
.where(Headline.published_at >= cutoff)
.order_by(desc(Headline.published_at))
.limit(300)
)).scalars().all()
out = []
for h in rows:
if any(kw in h.title.lower() for kw in THESIS_KEYWORDS_FALLBACK):
out.append({"source": h.source, "title": h.title})
if len(out) >= limit:
break
return out
async def _month_spend(session: AsyncSession) -> float:
total = (await session.execute(
select(func.coalesce(func.sum(AICall.cost_usd), 0.0))
.where(AICall.called_at >= month_start())
)).scalar()
return float(total or 0.0)
# ---------------------------------------------------------------------------
# Route
# ---------------------------------------------------------------------------
@router.post("/chat")
async def chat(
body: ChatRequest,
session: AsyncSession = Depends(get_session),
principal: CurrentUser | None = Depends(maybe_current_user),
):
"""Answer one user turn given the conversation so far. Grounded on the
latest strategic log + market data + thesis-filtered headlines.
Ephemeral — the conversation lives entirely in the client; the endpoint
just records each call's cost in `ai_calls`."""
# Paid-only feature. Free users get the static log but not the
# interactive chat (see /pricing).
from app.services.access import is_paid_active
if not is_paid_active(principal):
raise HTTPException(
status_code=402,
detail={"code": "paid_required",
"message": "Follow-up chat is a paid-tier feature."},
)
s = get_settings()
if not s.OPENROUTER_API_KEY:
raise HTTPException(status_code=503, detail="OPENROUTER_API_KEY not set")
# Monthly cost cap — same one the log job respects.
spent = await _month_spend(session)
if spent >= s.OPENROUTER_MONTHLY_CAP_USD:
raise HTTPException(
status_code=429,
detail=f"Monthly OpenRouter cap reached (${spent:.2f})",
)
# Trim runaway conversations: keep last 20 turns.
history = body.messages[-20:]
if not history or history[-1].role != "user":
raise HTTPException(status_code=400, detail="Last message must be user")
# Gather grounding context.
log_row = (await session.execute(
select(StrategicLog).order_by(desc(StrategicLog.generated_at)).limit(1)
)).scalar_one_or_none()
quotes = await _latest_quotes_by_group_chat(session)
headlines = await _thesis_headlines_for_chat(session)
system_prompt = build_chat_system_prompt(
s.CASSANDRA_TONE, s.CASSANDRA_ANALYSIS,
log_content=log_row.content if log_row else None,
log_generated_at=log_row.generated_at if log_row else None,
quotes_by_group=quotes,
headlines=headlines,
reference_line=REFERENCE_LINE,
)
# Respect the user's interface language preference: append a single
# localized "respond in" nudge so the assistant answers in IT when
# the user has lang=it. The prompt + history (which includes the
# user's own question, often in their language) are usually enough,
# but the nudge guarantees the first reply lands correctly.
user_lang = principal.user.lang if principal and principal.user else "en"
system_prompt = system_prompt + respond_in_clause(user_lang)
msgs = [{"role": "system", "content": system_prompt}]
for m in history:
msgs.append({"role": m.role, "content": m.content})
try:
async with httpx.AsyncClient(follow_redirects=True) as client:
result = await call_llm(client, msgs)
except Exception as e:
session.add(AICall(
model=s.OPENROUTER_MODEL, status="error", error=str(e)[:500],
))
await session.commit()
raise HTTPException(status_code=502, detail=f"OpenRouter error: {e}")
session.add(AICall(
model=result.model,
prompt_tokens=result.prompt_tokens,
completion_tokens=result.completion_tokens,
cost_usd=result.cost_usd,
status="ok",
))
await session.commit()
return {
"role": "assistant",
"content": result.content,
"content_html": _md_to_html(result.content),
"prompt_tokens": result.prompt_tokens,
"completion_tokens": result.completion_tokens,
}