read.markets/app/routers/chat.py
Giorgio Gilestro f9534f7ad6 review: gate strategic-log, portfolio, chat, and digest on reviewer
Extends the reviewer agent — previously only protecting indicator
summaries — to every AI-generated surface that reaches a user. The
reviewer's prompt already rejects scratchpad, truncation,
meta-commentary, and (since a6e476b) financial advice; wiring it in
turns those rules from prompt-level "asks" into structural gates.

Four call sites updated:

- ai_log_job.run() : after each tone/analysis variant is generated,
  pass through review_read. On reject, log the reason and skip the
  StrategicLog insert; the API's existing "latest StrategicLog" lookup
  falls back to the previous clean log.

- services/portfolio_analysis.analyse() : on reject, raise a clean
  RuntimeError that the /api/analyze router already maps to HTTP 502
  with a retry-able message. Portfolio analysis isn't cached server-
  side, so the user retries; the reviewer's verdict reason goes into
  the AICall ledger as the leaked-status row's error column.

- routers/chat.chat() : on reject, instead of returning the raw
  assistant content we return a short refusal explaining the limit
  and inviting a rephrase. Adds ~1-2 s of latency per turn (one extra
  LLM call to Haiku) — the only user-facing latency tax.

- jobs/email_digest_job._generate_variants() : on reject, the variant
  is dropped for the cycle. Recipients on the rejected tone get no
  digest email this run, which is better than delivering inbox copy
  that drifts into advice (emails are unrecallable once sent).

In every case the AICall ledger row records the reviewer cost so
month_spend stays accurate across all paths.

The reviewer system prompt is slightly generalised to cover both the
indicator-summary case and the longer-form log/digest/chat case:
- removes "short interpretive read" framing
- softens the "any question" rule so genuine rhetorical structure in
  a long-form log doesn't trigger a reject

tests/conftest.py grows an autouse fixture that stubs review_read to
clean=True in every consumer module. Tests that mock the generator
shouldn't have to also mock the safety gate behind it; tests that
specifically want the reject branch can override with their own
monkeypatch. test_output_review.py is unaffected — it imports
review_read directly.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 14:40:04 +02:00

239 lines
8.8 KiB
Python

"""Chat endpoint — POST /api/chat.
Grounded on the latest strategic log, current market quotes, and
thesis-filtered headlines. Ephemeral: the conversation lives in the
client; this endpoint just records each call's cost in `ai_calls`.
"""
from __future__ import annotations
from collections import defaultdict
from datetime import timedelta
import httpx
from fastapi import APIRouter, Depends, HTTPException
from pydantic import BaseModel, Field
from sqlalchemy import desc, func, select
from sqlalchemy.ext.asyncio import AsyncSession
from app.auth import require_token, maybe_current_user, CurrentUser
from app.config import get_settings
from app.db import get_session, utcnow
from app.jobs._market_context import REFERENCE_LINE
from app.models import AICall, Headline, Quote, StrategicLog
from app.routers.api import _md_to_html
from app.services.i18n import respond_in_clause
from app.services.llm_prompts import build_chat_system_prompt
from app.services.openrouter import call_llm, month_start
from app.services.output_review import review_read
from app.logging import get_logger
log = get_logger("chat")
router = APIRouter(dependencies=[Depends(require_token)])
# ---------------------------------------------------------------------------
# Pydantic models
# ---------------------------------------------------------------------------
class ChatMessage(BaseModel):
role: str = Field(pattern="^(user|assistant)$")
content: str
class ChatRequest(BaseModel):
messages: list[ChatMessage]
# ---------------------------------------------------------------------------
# Private helpers
# ---------------------------------------------------------------------------
THESIS_KEYWORDS_FALLBACK = [
"hormuz", "iran", "opec", "brent", "wti", "crude", "oil",
"china", "taiwan", "yuan", "fed", "inflation", "cpi", "yield",
"gold", "dollar", "yen", "saudi", "russia", "ukraine", "israel",
"nato", "defence", "defense",
]
async def _latest_quotes_by_group_chat(session: AsyncSession) -> dict[str, list[dict]]:
sub = (
select(Quote.group_name, Quote.symbol,
func.max(Quote.fetched_at).label("mx"))
.group_by(Quote.group_name, Quote.symbol)
.subquery()
)
rows = (await session.execute(
select(Quote).join(
sub,
(Quote.group_name == sub.c.group_name)
& (Quote.symbol == sub.c.symbol)
& (Quote.fetched_at == sub.c.mx),
).order_by(Quote.group_name, Quote.symbol)
)).scalars().all()
by_group: dict[str, list[dict]] = defaultdict(list)
for q in rows:
by_group[q.group_name].append({
"symbol": q.symbol, "label": q.label,
"price": q.price, "currency": q.currency,
"as_of": q.as_of, "changes": q.changes,
})
return by_group
async def _thesis_headlines_for_chat(session: AsyncSession, limit: int = 50) -> list[dict]:
cutoff = utcnow() - timedelta(hours=24)
rows = (await session.execute(
select(Headline)
.where(Headline.published_at >= cutoff)
.order_by(desc(Headline.published_at))
.limit(300)
)).scalars().all()
out = []
for h in rows:
if any(kw in h.title.lower() for kw in THESIS_KEYWORDS_FALLBACK):
out.append({"source": h.source, "title": h.title})
if len(out) >= limit:
break
return out
async def _month_spend(session: AsyncSession) -> float:
total = (await session.execute(
select(func.coalesce(func.sum(AICall.cost_usd), 0.0))
.where(AICall.called_at >= month_start())
)).scalar()
return float(total or 0.0)
# ---------------------------------------------------------------------------
# Route
# ---------------------------------------------------------------------------
@router.post("/chat")
async def chat(
body: ChatRequest,
session: AsyncSession = Depends(get_session),
principal: CurrentUser | None = Depends(maybe_current_user),
):
"""Answer one user turn given the conversation so far. Grounded on the
latest strategic log + market data + thesis-filtered headlines.
Ephemeral — the conversation lives entirely in the client; the endpoint
just records each call's cost in `ai_calls`."""
# Paid-only feature. Free users get the static log but not the
# interactive chat (see /pricing).
from app.services.access import is_paid_active
if not is_paid_active(principal):
raise HTTPException(
status_code=402,
detail={"code": "paid_required",
"message": "Follow-up chat is a paid-tier feature."},
)
s = get_settings()
if not s.OPENROUTER_API_KEY:
raise HTTPException(status_code=503, detail="OPENROUTER_API_KEY not set")
# Monthly cost cap — same one the log job respects.
spent = await _month_spend(session)
if spent >= s.OPENROUTER_MONTHLY_CAP_USD:
raise HTTPException(
status_code=429,
detail=f"Monthly OpenRouter cap reached (${spent:.2f})",
)
# Trim runaway conversations: keep last 20 turns.
history = body.messages[-20:]
if not history or history[-1].role != "user":
raise HTTPException(status_code=400, detail="Last message must be user")
# Gather grounding context.
log_row = (await session.execute(
select(StrategicLog).order_by(desc(StrategicLog.generated_at)).limit(1)
)).scalar_one_or_none()
quotes = await _latest_quotes_by_group_chat(session)
headlines = await _thesis_headlines_for_chat(session)
system_prompt = build_chat_system_prompt(
s.CASSANDRA_TONE, s.CASSANDRA_ANALYSIS,
log_content=log_row.content if log_row else None,
log_generated_at=log_row.generated_at if log_row else None,
quotes_by_group=quotes,
headlines=headlines,
reference_line=REFERENCE_LINE,
)
# Respect the user's interface language preference: append a single
# localized "respond in" nudge so the assistant answers in IT when
# the user has lang=it. The prompt + history (which includes the
# user's own question, often in their language) are usually enough,
# but the nudge guarantees the first reply lands correctly.
user_lang = principal.user.lang if principal and principal.user else "en"
system_prompt = system_prompt + respond_in_clause(user_lang)
msgs = [{"role": "system", "content": system_prompt}]
for m in history:
msgs.append({"role": m.role, "content": m.content})
try:
async with httpx.AsyncClient(follow_redirects=True) as client:
result = await call_llm(client, msgs)
# Reviewer gate. The chat turn could solicit advice with a
# leading question; the generator's system prompt forbids it,
# but the reviewer is the enforcement layer. ~1-2 s extra
# latency per turn on top of the generation call.
verdict = await review_read(client, result.content)
except Exception as e:
session.add(AICall(
model=s.OPENROUTER_MODEL, status="error", error=str(e)[:500],
))
await session.commit()
raise HTTPException(status_code=502, detail=f"OpenRouter error: {e}")
full_cost = (result.cost_usd or 0.0) + (verdict.cost_usd or 0.0)
if not verdict.clean:
# Rejected reply. Record the cost and surface a generic refusal
# the user can retry, rather than letting potentially non-compliant
# text reach them.
session.add(AICall(
model=result.model,
prompt_tokens=result.prompt_tokens,
completion_tokens=result.completion_tokens,
cost_usd=full_cost, status="leaked",
error=f"reviewer: {verdict.reason}",
))
await session.commit()
log.warning("chat.reviewer_rejected", reason=verdict.reason,
preview=result.content[:120])
refusal = (
"I can't generate that reply — it would have crossed into "
"investment advice or specific recommendations, which I'm "
"not licensed to give. Try rephrasing as a question about "
"what the data means rather than what to do."
)
return {
"role": "assistant",
"content": refusal,
"content_html": _md_to_html(refusal),
"prompt_tokens": result.prompt_tokens,
"completion_tokens": result.completion_tokens,
}
session.add(AICall(
model=result.model,
prompt_tokens=result.prompt_tokens,
completion_tokens=result.completion_tokens,
cost_usd=full_cost,
status="ok",
))
await session.commit()
return {
"role": "assistant",
"content": result.content,
"content_html": _md_to_html(result.content),
"prompt_tokens": result.prompt_tokens,
"completion_tokens": result.completion_tokens,
}