read.markets/app/services/openrouter.py
Giorgio Gilestro 4adc8dfe82 openrouter: split into llm_prompts (prompt engineering) + transport
openrouter.py was 790 lines mixing two orthogonal concerns:
- Prompt engineering (build_system_prompt, build_summary_*,
  build_chat_*, build_daily_digest_*, etc.) — ~400 lines, changes
  weekly as PROMPT_VERSION bumps
- LLM transport (call_llm, _provider_chain, _call_provider, retry
  + fallback machinery) — ~250 lines, rarely changes

Extracted the prompt-engineering surface to app/services/llm_prompts.py.
Transport stays in openrouter.py (consistent with the filename — the
OpenRouter URL is the transport's anchor).

All import sites (jobs, routers, services, tests) split their
multi-import lines into two: prompt-things from llm_prompts, transport
from openrouter. PROMPT_VERSION constant, _TONE_ALIASES, _resolve_tone,
and SYSTEM_PROMPT moved with the prompt functions.

No behaviour change — pure relocation. Function signatures, body, and
naming all preserved.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-27 21:27:23 +02:00

206 lines
7.3 KiB
Python

"""LLM transport layer — OpenRouter / DeepSeek API calls.
Handles provider selection, retry + fallback machinery, and the monthly
budget-cap helpers. Prompt engineering lives in ``app.services.llm_prompts``;
this module only cares about *how* to reach the model, not *what to ask*.
"""
from __future__ import annotations
import json
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone
import httpx
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential
from app import branding
from app.config import get_settings
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
@dataclass
class LogResult:
content: str
model: str
prompt_tokens: int | None
completion_tokens: int | None
cost_usd: float | None
def _provider_chain() -> list[str]:
"""Ordered list of providers to try: primary, then fallback (unless
the fallback is unset, the same as primary, or has no API key)."""
s = get_settings()
primary = (s.LLM_PROVIDER or "deepseek").lower()
fallback = (s.LLM_FALLBACK or "").lower()
chain = [primary]
if fallback and fallback != primary:
chain.append(fallback)
# Drop providers with no API key configured.
return [p for p in chain if _provider_has_key(p)]
def _provider_has_key(provider: str) -> bool:
s = get_settings()
if provider == "deepseek":
return bool(s.DEEPSEEK_API_KEY)
if provider == "openrouter":
return bool(s.OPENROUTER_API_KEY)
return False
def _endpoint_for(provider: str) -> tuple[str, str, str, dict[str, str]]:
"""Resolve (url, api_key, default_model, extra_headers) for a specific
provider. Raises if its API key isn't set."""
s = get_settings()
if provider == "deepseek":
if not s.DEEPSEEK_API_KEY:
raise RuntimeError("DEEPSEEK_API_KEY not set")
return s.DEEPSEEK_URL, s.DEEPSEEK_API_KEY, s.DEEPSEEK_MODEL, {}
if provider == "openrouter":
if not s.OPENROUTER_API_KEY:
raise RuntimeError("OPENROUTER_API_KEY not set")
return (
OPENROUTER_URL,
s.OPENROUTER_API_KEY,
s.OPENROUTER_MODEL,
{
# OpenRouter-specific attribution headers. Visible on the
# OpenRouter dashboard — keep aligned with the live brand.
"HTTP-Referer": branding.SITE_URL,
"X-Title": branding.BRAND_NAME,
# No-train opt-out. Tells OpenRouter (and any compatible
# upstream) that this request must not be used to train
# or improve models. The Privacy notice promises this; the
# header is what makes the promise truthful. If a future
# upstream ignores the header, fix the provider — not the
# header — so the contract stays auditable.
"X-OR-Allow-Training": "false",
},
)
raise RuntimeError(f"Unknown LLM provider: {provider!r}")
def llm_configured() -> bool:
"""At least one provider in the configured chain has an API key."""
return bool(_provider_chain())
def active_model() -> str:
"""Return the model name of the *first* provider in the configured
chain (the one that would be tried first). Used to label AICall ledger
rows when no actual call result is available yet."""
chain = _provider_chain()
if not chain:
return "unknown"
s = get_settings()
return s.DEEPSEEK_MODEL if chain[0] == "deepseek" else s.OPENROUTER_MODEL
@retry(
reraise=True,
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=2, min=2, max=30),
retry=retry_if_exception_type((httpx.HTTPStatusError, httpx.TransportError)),
)
async def _call_provider(
client: httpx.AsyncClient,
provider: str,
messages: list[dict],
model: str | None,
max_tokens: int,
) -> LogResult:
"""One provider call with tenacity retries on transport/HTTP errors.
Lives inside the retry decorator so retries happen within a provider,
not across the fallback chain."""
url, api_key, default_model, extra_headers = _endpoint_for(provider)
used_model = model or default_model
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json",
**extra_headers,
}
r = await client.post(
url,
headers=headers,
json={"model": used_model, "messages": messages, "max_tokens": max_tokens},
timeout=180,
)
r.raise_for_status()
data = r.json()
msg = data["choices"][0]["message"]
# Some providers return null content + populated `reasoning` for thinking
# models, or null content when finish_reason=length cut off the response.
content = msg.get("content") or msg.get("reasoning")
if not content:
finish = data["choices"][0].get("finish_reason")
raise RuntimeError(
f"LLM returned empty content (finish_reason={finish}, "
f"provider={provider}, model={used_model}, max_tokens={max_tokens})"
)
usage = data.get("usage") or {}
return LogResult(
content=content,
# Record provider+model so admin can see which path produced this row.
model=f"{provider}/{used_model}",
prompt_tokens=usage.get("prompt_tokens"),
completion_tokens=usage.get("completion_tokens"),
cost_usd=usage.get("cost") or usage.get("total_cost"),
)
async def call_llm(
client: httpx.AsyncClient,
messages: list[dict],
model: str | None = None,
max_tokens: int = 4000,
) -> LogResult:
"""Provider-aware chat completion with fallback. Tries primary
(LLM_PROVIDER) first; if it raises after retries, falls through to
LLM_FALLBACK. Raises only if every provider in the chain fails.
The returned LogResult.model is prefixed with the provider that
actually answered (e.g. ``deepseek/deepseek-v4-flash`` or
``openrouter/deepseek/deepseek-v4-flash``) — useful admin metadata
even though we hide it from the user-facing UI."""
chain = _provider_chain()
if not chain:
raise RuntimeError("No LLM provider configured (no API key set)")
last_exc: Exception | None = None
for i, provider in enumerate(chain):
try:
result = await _call_provider(
client, provider, messages, model, max_tokens,
)
if i > 0:
from app.logging import get_logger
get_logger("llm").info(
"llm.fallback_succeeded", provider=provider, attempt=i + 1,
)
return result
except Exception as e:
last_exc = e
if i + 1 < len(chain):
from app.logging import get_logger
get_logger("llm").warning(
"llm.primary_failed_trying_fallback",
provider=provider, error=str(e)[:200],
)
continue
# Re-raise the last exception so callers see the failure mode.
assert last_exc is not None
raise last_exc
def month_window() -> tuple[datetime, datetime]:
"""[start, now] in UTC for the current calendar month."""
now = datetime.now(timezone.utc)
start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
return start, now
def month_start() -> datetime:
return month_window()[0]