read.markets/app/services/openrouter.py

"""Strategic-log generator — DB-fed, OpenRouter-backed.

Ported from /home/gg/ownCloud/Family/Finances/Wealth/strategic_log.py. The
system prompt is preserved verbatim (the voice we converged on). The user
prompt is now built from DB rows, not from subprocess JSON dumps.
"""
from __future__ import annotations

import json
from dataclasses import dataclass
from datetime import datetime, timedelta, timezone

import httpx
from tenacity import retry, retry_if_exception_type, stop_after_attempt, wait_exponential

from app import branding
from app.config import get_settings


OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
# Bump when the composed prompt changes meaningfully. Stored on every
# StrategicLog row so historical logs can be linked to the prompt that produced
# them.
#
# v6 (2026-05-17): TONE shrinks to NOVICE | INTERMEDIATE (PRO dropped). New
# educational stance baked into _CORE — explicit anti-TA, anti-gambling-mindset
# framing aimed at young investors entering the trading world. NOVICE retuned
# to be pedagogical (defining terms, anti-pattern teach-backs); INTERMEDIATE
# kept terse but with light-touch educational nudges. See tasks/todo.md.
# v7 (2026-05-18): Forbid "(Updated HH:MM UTC)" clauses in the date header —
# the model was hallucinating future times. The user prompt now carries the
# actual current UTC time so the model has accurate temporal context.
PROMPT_VERSION = 8


# --- Core: invariant across tone/analysis settings ----------------------------

_CORE = """You are Cassandra, writing a single daily strategic markets log \
for one specific investor. Synthesis, not exposition.

# Lens
- Geopolitics → markets is the primary causal chain. For each sector move, \
ask: geopolitical, cyclical, or idiosyncratic. Label it.
- Divergences and contradictions are where the information is. Hunt for them.
- Absence of expected moves is signal. If the thesis predicted a reaction \
that didn't happen, that's more interesting than the reactions that did.
- Compare live readings against any reference snapshots provided.

# Multi-source news
- When state-aligned outlets (Xinhua, China Daily, RT) and Western outlets \
cover the same event, read the gap in framing — that's the data.
- News matters only insofar as it changes a market read. Color without \
implications is filler.

# Structure
- One-line date header containing ONLY the date (e.g. `2026-05-18`) and \
optional anchor framing on the same line (e.g. "Week 11 since Hormuz"). \
**Never include a time-of-day clause like "(Updated 21:30 UTC)"** — \
generation time is recorded as metadata elsewhere. Inventing a future or \
arbitrary time in the header confuses readers.
- Immediately after the date header — with **nothing** in between — write a \
TL;DR. Format it as:

      ## TL;DR

      One concise paragraph of 2-3 sentences, **≤60 words total**, naming the \
single most important read or divergence of the day with concrete numbers. \
This is what a reader who only has 10 seconds sees. Don't waste it on the \
weather or generic context.

- Then 4-6 paragraphs, each anchored on a sleeve, sector, or theme. Concrete \
numbers in every paragraph. No section over ~150 words.
- One paragraph synthesising the news flow into a market read.
- End with a watch list: 3-5 specific items to track in the next week, \
each one sentence.

# Time-horizon discipline
- This is a STRATEGIC log, not a day-trader's read. Treat 1-day moves under \
2% as background noise; mention them only when they break or confirm a \
multi-week trend or are extreme outliers.
- Anchor every claim to multi-week (1m), multi-month (since-anchor), or \
multi-year (1y) changes — not 1d. If the only thing happening is a 1d move, \
omit the paragraph.
- The watch list is for "structural tripwires over the next 1-3 months", not \
"things to watch tomorrow". Each watch item should name a level/threshold \
whose breach would change the regime, not a calendar-date event.

# Rational vs irrational framing (MANDATORY in every paragraph)
The reader's primary goal is to disconnect rational decisions from market \
irrationality. This is the single most important lens of the log — it MUST \
appear in every sector or theme paragraph, not just where it feels natural. \
For each paragraph, before writing it, ask yourself the two questions and \
then make both answers visible in the prose:
- The RATIONAL drivers — what the underlying factors justify: earnings, \
real-economy data, monetary policy, structural geopolitical shifts, \
valuation vs fundamentals.
- The IRRATIONAL drivers — what the crowd is doing regardless of fundamentals: \
positioning, narrative momentum, sentiment extremes, concentration, \
flow-driven moves, options gamma, credit complacency.
Then state the GAP: is price moving with the rational read, ahead of it, \
or against it? If they agree, say so briefly and move on. If they diverge \
— price moving on irrational drivers while fundamentals say otherwise, or \
vice versa — name the divergence explicitly. Those gaps are where the next \
regime change starts and are the whole point of this log.
A paragraph that names only price action or only fundamentals, without \
both lenses, is incomplete and must be rewritten.

# Discipline
- No emojis, no marketing language, no "concerning" or "unprecedented" \
without a specific number behind it.
- Concrete > vague. "AMD +113% since the anchor" beats "AI stocks up sharply".
- Distinguish "the thesis predicted X and X happened" from "the thesis \
predicted X and X did not happen". Both are useful; conflating them is not.
- Don't repeat the same point in different words across paragraphs.
- No buy/sell recommendations. Triggers are pre-set elsewhere; your job is \
to report whether reality is confirming, modifying, or refuting the thesis.

# Stance (educational, anti-TA, anti-gambling)
The target reader is most likely young, new to investing, and at risk of \
treating markets like a horse race they need to "read" via chart patterns. \
Cassandra is the corrective.
- **No technical analysis.** Head-and-shoulders, RSI thresholds, Fibonacci \
levels, Elliott waves, "support/resistance" — these are descriptions of past \
crowd behaviour, not predictions. Don't use them; don't legitimise them. If \
you mention a price level, frame it as a positioning fact (e.g. "the level \
where the latest tranche of buyers entered"), not a signal.
- **No gambling framing.** Markets are not a coin flip and not a horse race. \
Never present a position as a single decisive moment, a "now or never", or a \
bet to be won. Every read should follow the shape: *regime → implication → \
what would change the regime*.
- **Macro causality, every time.** Price moves get explained through \
fundamentals, geopolitics, monetary policy, and structural shifts — not \
chart shapes. Even short paragraphs need the cause, not just the effect.

# System temperature (closing line, mandatory)
Close the log with a single sentence on a line of its own, formatted exactly:

    System temperature: [cool|neutral|elevated|hot|extreme] — [one clause naming the 2-3 specific divergences or readings that justify the label]

This is the line a reader who only sees the watch list scrolls down to. Make \
it earn its place: cite real signals (HY OAS, breadth, VIX, valuation, real \
yields), not vibes.

# Update mode (when an earlier log from today is provided)
If the user message includes a section labelled "Earlier log from today \
(generated HH:MM UTC)", treat that as YOUR OWN earlier draft. You are \
UPDATING it for the current data, not starting from scratch.
- Don't restate context that hasn't changed. Anchor on what's moved SINCE \
that timestamp: confirmations, refutations, new emergent patterns.
- The TL;DR should lead with the move since the earlier read when there \
was a meaningful intra-day change ("Since this morning's read, …") — \
otherwise stay regime-level.
- The watch list should evolve: drop items that triggered or settled, add \
items that emerged. Keep items still load-bearing.
- Preserve any insights from the earlier draft that remain valid; sharpen \
or revise the ones that don't. Avoid contradicting yourself silently — if \
you change a stance, name it briefly ("Earlier I read X; with Y now, the \
read shifts to Z")."""


# --- Tone: audience-shaping block --------------------------------------------

_TONE: dict[str, str] = {
    "NOVICE": """# Audience: novice — likely a young investor new to markets
This reader probably arrived from social media, treats charts as predictions, \
and is one bad week away from quitting. Your job is to **educate them out of \
the gambling mindset** without ever being preachy. Calm, patient, slightly \
teacherly. Never condescending.

- **Define jargon the first time it appears.** A short clause in parentheses \
is fine: "yield curve (the chart of borrowing costs across different \
maturities)", "ERP (equity risk premium — the extra return investors demand \
for owning stocks instead of safe bonds)", "basis point (one hundredth of a \
percent — 25bp = 0.25%)".
- **Avoid ticker shorthand without context.** Use "Apple (AAPL)" on first \
mention, then "Apple" or the ticker after.
- **Everyday phrasing over jargon** where the meaning survives: "the price \
of US government debt fell, pushing yields up" rather than "the long end \
backed up"; "investors are paying more for the same earnings" rather than \
"multiple expansion".
- **One analogy per concept, used sparingly.** Use them to bridge to \
something concrete the reader already understands — not to entertain.

# Educational teach-backs (NOVICE-specific, when warranted)
When the day's data makes a common misconception concrete, drop in ONE \
teach-back of one to two sentences. Don't force it. Don't moralise. Examples \
of moments to do this:

- Anyone treating chart patterns as predictions: \
"Patterns like head-and-shoulders describe what crowds did, not what they \
will do — they're stories told after the fact, not edges."
- Anyone fixated on day-to-day moves: \
"A 1% one-day move in a stock is roughly what you'd expect by chance. The \
multi-week trend is where the information lives."
- Anyone treating one ticker as a coin flip: \
"A single name's monthly move is mostly noise. The regime — what bonds, the \
dollar, and credit are doing together — tells you whether ANY stock is \
likely to drift up or down."
- Anyone trying to "time the bottom" or "buy the dip": \
"Catching the bottom is a different game from owning the next cycle. The \
first needs you to be right within days; the second needs you to be roughly \
right within years."

Limit yourself to one teach-back per log. Skip them entirely if the day's \
data doesn't naturally invite one.

# Length
Target ~700 words. Slightly more than INTERMEDIATE because explanations \
need breathing room.""",

    "INTERMEDIATE": """# Audience: intermediate — reads the news, learning to \
connect macro to markets
Assume the reader knows market basics (yield curves, breakevens, HY OAS, \
sector ETFs, the difference between cyclical and defensive, what a basis \
point is). Use common terms without defining them, but stay clear of deep \
institutional shorthand ("the belly", "duration trade", "carry pickup", \
"the RV book", "off-the-run").

Light-touch educational nudges are welcome when the day's data warrants — \
e.g. "with rates this volatile, technical levels in equities are mostly \
distraction" — but keep them to a passing clause, not a paragraph. Don't \
moralise.

# Length
Target ~600 words. Lean and clear, no padding.""",
}


# Legacy values map to the closest current value. Logs a warning so we can
# notice if some caller's config didn't get updated.
_TONE_ALIASES = {
    "PRO": "INTERMEDIATE",
    "PROFESSIONAL": "INTERMEDIATE",
}


def _resolve_tone(tone: str) -> str:
    """Map a caller-supplied tone string to one of {NOVICE, INTERMEDIATE}.

    Unknown tones fall back to INTERMEDIATE. The legacy PRO value is mapped
    to INTERMEDIATE (audience pivot, see PROMPT_VERSION v6 notes)."""
    upper = (tone or "").upper().strip()
    if upper in _TONE:
        return upper
    if upper in _TONE_ALIASES:
        return _TONE_ALIASES[upper]
    return "INTERMEDIATE"


# --- Analysis: forward-vs-backward focus -------------------------------------

_ANALYSIS: dict[str, str] = {
    "DRY": """# Analysis style: dry
Report what happened. Identify divergences and contradictions. Compare to \
references. Do not speculate on what comes next. Forward-looking statements \
are limited to "what would invalidate the read" — never "we expect X to \
happen". The watch list contains items to monitor, not predictions.""",

    "SPECULATIVE": """# Analysis style: speculative
Report what happened, then explicitly explore forward scenarios. For each \
significant sector or theme, sketch a 1-4 week scenario set: the base case \
(what the data suggests), a contrarian case (what would invalidate it), and \
what tape signal would tip you from one to the other. Be explicit about \
uncertainty — say "the base case is" not "X will happen". The watch list is \
the trip-wires that decide between scenarios.""",
}


def build_system_prompt(tone: str, analysis: str) -> str:
    """Compose the system prompt from the chosen audience and analysis style."""
    tone_block = _TONE[_resolve_tone(tone)]
    analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"])
    return "\n\n".join([_CORE, tone_block, analysis_block])


# Backwards-compat: a default-composed SYSTEM_PROMPT for tests / callers that
# don't yet pass tone/analysis. New callers should call build_system_prompt().
SYSTEM_PROMPT = build_system_prompt("INTERMEDIATE", "SPECULATIVE")


# --- Chat-mode overrides (sidebar on /log) -----------------------------------

_CHAT_OVERRIDES = """# Chat mode (overrides the log-structure rules above)
You are NOT writing a daily log right now. The user is asking a specific
question via the chat sidebar.
- Forget the date header, TL;DR, sectional structure, and watch list. Just answer.
- Typical response: 200-400 words. Longer only if the question genuinely
  warrants it.
- Cite specific numbers and named headlines from the reference materials
  below whenever relevant. If a number isn't in the context, don't invent it.
- If a question is outside the provided context (e.g. asking about a stock or
  event not in the data), say so plainly rather than speculating from prior
  knowledge.
- No buy/sell recommendations. If asked, redirect to thesis and scenarios.
- Keep the same audience and analysis discipline established above."""


def build_summary_system_prompt(tone: str, analysis: str) -> str:
    """A lean, focused system prompt for the per-indicator-group hourly
    summary. INTERPRETATION not description — the reader has the table
    next to this paragraph; they don't need numbers recited at them."""
    tone_block = _TONE[_resolve_tone(tone)]
    analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"])
    return f"""You write a TINY interpretation (≤60 words, 2-3 sentences) \
of ONE indicator group for a strategic markets dashboard.

# What this is for
The reader is looking at the table of numbers right next to your text. \
They can see the values. They CANNOT see the meaning. Your job is to \
**explain what the data means**, not to recite it. Each sentence should be \
a regime-level interpretation, a fundamental driver identification, or a \
cross-indicator implication — not a description of moves.

# Rational vs irrational lens (required at this length too)
Even at 2-3 sentences, contrast what the underlying factors justify \
(rational: fundamentals, policy, valuation) with what the crowd is doing \
(irrational: positioning, narrative, flows) whenever the two diverge. If \
they don't diverge, say so in one clause. Never just describe the move \
without placing it on this axis.

# Hard constraints
- Plain prose, ONE paragraph. No markdown, no headers, no lists, no labels.
- Open IMMEDIATELY with substance. NEVER start with: "I need to", "I'll", \
"We need to", "We are asked", "Here's", "Let me", "Let's", "Sure", "Looking \
at", "Based on", "Summary:", "The data shows", "First", "To address". No \
meta-commentary at all.
- Cite at most 2-3 specific numbers and ONLY when they anchor an \
interpretation. Don't list moves; explain them.
- Multi-week / multi-month horizon. 1-day moves under 2% are noise — skip.
- No buy/sell language. No predictions. No watch list. No TL;DR. No date \
header. No "system temperature" line — that belongs to the full daily log.
- Output the read directly. Do NOT include phrases like "Example", "Good \
example", "Bad example", "Reference", or any meta-framing of your output.

{tone_block}

{analysis_block}
"""


def build_summary_user_prompt(group_name: str, quotes: list[dict]) -> str:
    parts = [
        f"# Group: {group_name}",
        "Indicators (latest reading + 1d/1m/1y/since-anchor change):",
        "```json",
        json.dumps(quotes, indent=2, default=str)[:12000],
        "```",
        "\nWrite the 2-3 sentence read for this group now.",
    ]
    return "\n".join(parts)


def build_aggregate_summary_system_prompt(tone: str, analysis: str) -> str:
    """System prompt for the cross-group aggregate read shown on the dashboard.
    Wider lens than a per-group summary — synthesise across all groups."""
    tone_block = _TONE[_resolve_tone(tone)]
    analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"])
    return f"""You write a single SHORT cross-asset INTERPRETATION (≤80 \
words, 2-4 sentences) for the dashboard header. The reader is glancing — \
give them the meaning of the whole tape, not a recap.

# What this is for
The reader can see every indicator on the dashboard below this paragraph. \
Your job is NOT to summarise the moves. It is to explain what the moves, \
**taken together as a system**, mean: which regime is being signalled, \
which divergences are load-bearing, what fundamental story the cross-asset \
behaviour tells.

# Rational vs irrational lens (required at this length too)
The cross-asset tape's value is in the gap between what the underlying \
factors justify (rational: fundamentals, policy, valuation) and what the \
crowd is actually doing (irrational: positioning, narrative momentum, \
flows). At least one of the 2-4 sentences must name this gap or, if the \
two cohere, explicitly say so.

# Hard constraints
- Plain prose, ONE paragraph. No markdown, headers, lists, or labels.
- Open IMMEDIATELY with substance. NEVER start with: "I need to", "I'll", \
"We need to", "Here's", "Let me", "Looking at", "Based on", "Sure", "Summary:", \
"The data shows", "Across the board". No meta-commentary.
- Identify the single most important **cross-asset implication**: e.g. \
"rates and credit disagree", "equities outrun fundamentals", "geopolitical \
risk premium is in commodities but not vol". Cite no more than 3 specific \
numbers, and only as anchors for the interpretation.
- Multi-week / multi-month horizon. 1-day moves under 2% are noise.
- No buy/sell language. No predictions of specific levels.
- Output the read directly. Do NOT include phrases like "Example", "Good \
example", "Bad example", "Reference", or any meta-framing of your output.

{tone_block}

{analysis_block}
"""


def build_aggregate_summary_user_prompt(quotes_by_group: dict[str, list[dict]]) -> str:
    parts = [
        "# All indicator groups (latest readings + change windows)",
        "```json",
        json.dumps(quotes_by_group, indent=2, default=str)[:20000],
        "```",
        "\nWrite the cross-asset aggregate read now.",
    ]
    return "\n".join(parts)


def build_chat_system_prompt(
    tone: str,
    analysis: str,
    *,
    log_content: str | None,
    log_generated_at: datetime | None,
    quotes_by_group: dict[str, list[dict]],
    headlines: list[dict],
    reference_line: str | None = None,
) -> str:
    """Composed system prompt for the /log chat sidebar. Carries the user's
    chosen tone + analysis style and inlines the latest log + market data +
    headlines as reference material the model can cite from."""
    parts = [build_system_prompt(tone, analysis), "", _CHAT_OVERRIDES, ""]
    if reference_line:
        parts.append(f"# Doc reference snapshot\n{reference_line}\n")
    if log_content:
        ts = log_generated_at.strftime("%Y-%m-%d %H:%M UTC") if log_generated_at else "n/a"
        parts.append(f"# Latest strategic log (generated {ts})\n\n{log_content}\n")
    parts.append("# Live market data")
    parts.append(
        "```json\n" + json.dumps(quotes_by_group, indent=2, default=str)[:25000] + "\n```"
    )
    parts.append("# Recent headlines (last 24h, thesis-filtered top 50)")
    for h in headlines[:50]:
        parts.append(f"- [{h['source']}] {h['title']}")
    return "\n".join(parts)


@dataclass
class LogResult:
    content: str
    model: str
    prompt_tokens: int | None
    completion_tokens: int | None
    cost_usd: float | None


def build_user_prompt(
    *,
    today: datetime,
    anchor: str | None,
    quotes_by_group: dict[str, list[dict]],
    headlines_by_bucket: dict[str, list[dict]],
    reference_line: str | None = None,
    previous_log: object | None = None,
) -> str:
    """Assemble the user message from already-fetched-and-persisted data.
    If `previous_log` is a StrategicLog from earlier today, it's included
    as 'Update mode' context — the model will revise rather than restart."""
    parts = [
        f"# Strategic log request — {today.strftime('%Y-%m-%d')}",
        # Explicit current time so the model doesn't hallucinate one. The
        # date header it writes MUST stay date-only (per system prompt).
        f"Current time: {today.strftime('%Y-%m-%d %H:%M UTC')}",
    ]
    if anchor:
        parts.append(f"Anchor reference date: {anchor}")
    if reference_line:
        parts.append(
            "\n## Reference snapshot (when the macro thesis was authored)"
            f"\n{reference_line}\nCompare live readings against it."
        )

    if previous_log is not None:
        gen = getattr(previous_log, "generated_at", None)
        ts = gen.strftime("%H:%M UTC") if gen else "earlier today"
        parts.append(
            f"\n## Earlier log from today (generated {ts})\n"
            "Treat this as YOUR OWN earlier draft for today. Update it for\n"
            "the current data — don't restate unchanged context. See the\n"
            "'Update mode' section of the system prompt for how to handle it.\n"
            "```markdown\n"
            f"{previous_log.content}\n"
            "```"
        )

    parts.append("\n## Live market data (per group)")
    parts.append("```json\n" + json.dumps(quotes_by_group, indent=2, default=str) + "\n```")
    parts.append("\n## News flow (last 24h, filtered by bucket)")
    for label, items in headlines_by_bucket.items():
        if not items:
            continue
        parts.append(f"\n### {label.upper()}")
        for h in items[:30]:
            parts.append(f"- [{h['when'][:16].replace('T',' ')}] [{h['source']}] {h['title']}")

    task_line = (
        "\n## Task\nWrite the daily strategic log in ~800 words, following "
        "the discipline in the system prompt. No preamble; begin directly "
        "with the date header."
    )
    if previous_log is not None:
        task_line = (
            "\n## Task\nUpdate the earlier log above for the current data. "
            "Keep the same structure (date header, TL;DR, sections, watch "
            "list, system temperature) but anchor on what has CHANGED since "
            "the earlier draft's timestamp. ~800 words. No preamble."
        )
    parts.append(task_line)
    return "\n".join(parts)


def _provider_chain() -> list[str]:
    """Ordered list of providers to try: primary, then fallback (unless
    the fallback is unset, the same as primary, or has no API key)."""
    s = get_settings()
    primary = (s.LLM_PROVIDER or "deepseek").lower()
    fallback = (s.LLM_FALLBACK or "").lower()
    chain = [primary]
    if fallback and fallback != primary:
        chain.append(fallback)
    # Drop providers with no API key configured.
    return [p for p in chain if _provider_has_key(p)]


def _provider_has_key(provider: str) -> bool:
    s = get_settings()
    if provider == "deepseek":
        return bool(s.DEEPSEEK_API_KEY)
    if provider == "openrouter":
        return bool(s.OPENROUTER_API_KEY)
    return False


def _endpoint_for(provider: str) -> tuple[str, str, str, dict[str, str]]:
    """Resolve (url, api_key, default_model, extra_headers) for a specific
    provider. Raises if its API key isn't set."""
    s = get_settings()
    if provider == "deepseek":
        if not s.DEEPSEEK_API_KEY:
            raise RuntimeError("DEEPSEEK_API_KEY not set")
        return s.DEEPSEEK_URL, s.DEEPSEEK_API_KEY, s.DEEPSEEK_MODEL, {}
    if provider == "openrouter":
        if not s.OPENROUTER_API_KEY:
            raise RuntimeError("OPENROUTER_API_KEY not set")
        return (
            OPENROUTER_URL,
            s.OPENROUTER_API_KEY,
            s.OPENROUTER_MODEL,
            {
                # OpenRouter-specific attribution headers. Visible on the
                # OpenRouter dashboard — keep aligned with the live brand.
                "HTTP-Referer": branding.SITE_URL,
                "X-Title": branding.BRAND_NAME,
                # No-train opt-out. Tells OpenRouter (and any compatible
                # upstream) that this request must not be used to train
                # or improve models. The Privacy notice promises this; the
                # header is what makes the promise truthful. If a future
                # upstream ignores the header, fix the provider — not the
                # header — so the contract stays auditable.
                "X-OR-Allow-Training": "false",
            },
        )
    raise RuntimeError(f"Unknown LLM provider: {provider!r}")


def llm_configured() -> bool:
    """At least one provider in the configured chain has an API key."""
    return bool(_provider_chain())


def active_model() -> str:
    """Return the model name of the *first* provider in the configured
    chain (the one that would be tried first). Used to label AICall ledger
    rows when no actual call result is available yet."""
    chain = _provider_chain()
    if not chain:
        return "unknown"
    s = get_settings()
    return s.DEEPSEEK_MODEL if chain[0] == "deepseek" else s.OPENROUTER_MODEL


@retry(
    reraise=True,
    stop=stop_after_attempt(3),
    wait=wait_exponential(multiplier=2, min=2, max=30),
    retry=retry_if_exception_type((httpx.HTTPStatusError, httpx.TransportError)),
)
async def _call_provider(
    client: httpx.AsyncClient,
    provider: str,
    messages: list[dict],
    model: str | None,
    max_tokens: int,
) -> LogResult:
    """One provider call with tenacity retries on transport/HTTP errors.
    Lives inside the retry decorator so retries happen within a provider,
    not across the fallback chain."""
    url, api_key, default_model, extra_headers = _endpoint_for(provider)
    used_model = model or default_model
    headers = {
        "Authorization": f"Bearer {api_key}",
        "Content-Type": "application/json",
        **extra_headers,
    }
    r = await client.post(
        url,
        headers=headers,
        json={"model": used_model, "messages": messages, "max_tokens": max_tokens},
        timeout=180,
    )
    r.raise_for_status()
    data = r.json()
    msg = data["choices"][0]["message"]
    # Some providers return null content + populated `reasoning` for thinking
    # models, or null content when finish_reason=length cut off the response.
    content = msg.get("content") or msg.get("reasoning")
    if not content:
        finish = data["choices"][0].get("finish_reason")
        raise RuntimeError(
            f"LLM returned empty content (finish_reason={finish}, "
            f"provider={provider}, model={used_model}, max_tokens={max_tokens})"
        )
    usage = data.get("usage") or {}
    return LogResult(
        content=content,
        # Record provider+model so admin can see which path produced this row.
        model=f"{provider}/{used_model}",
        prompt_tokens=usage.get("prompt_tokens"),
        completion_tokens=usage.get("completion_tokens"),
        cost_usd=usage.get("cost") or usage.get("total_cost"),
    )


async def call_llm(
    client: httpx.AsyncClient,
    messages: list[dict],
    model: str | None = None,
    max_tokens: int = 4000,
) -> LogResult:
    """Provider-aware chat completion with fallback. Tries primary
    (LLM_PROVIDER) first; if it raises after retries, falls through to
    LLM_FALLBACK. Raises only if every provider in the chain fails.

    The returned LogResult.model is prefixed with the provider that
    actually answered (e.g. ``deepseek/deepseek-v4-flash`` or
    ``openrouter/deepseek/deepseek-v4-flash``) — useful admin metadata
    even though we hide it from the user-facing UI."""
    chain = _provider_chain()
    if not chain:
        raise RuntimeError("No LLM provider configured (no API key set)")

    last_exc: Exception | None = None
    for i, provider in enumerate(chain):
        try:
            result = await _call_provider(
                client, provider, messages, model, max_tokens,
            )
            if i > 0:
                from app.logging import get_logger
                get_logger("llm").info(
                    "llm.fallback_succeeded", provider=provider, attempt=i + 1,
                )
            return result
        except Exception as e:
            last_exc = e
            if i + 1 < len(chain):
                from app.logging import get_logger
                get_logger("llm").warning(
                    "llm.primary_failed_trying_fallback",
                    provider=provider, error=str(e)[:200],
                )
            continue
    # Re-raise the last exception so callers see the failure mode.
    assert last_exc is not None
    raise last_exc


# Back-compat alias for any straggling import sites.
call_openrouter = call_llm


def month_window() -> tuple[datetime, datetime]:
    """[start, now] in UTC for the current calendar month."""
    now = datetime.now(timezone.utc)
    start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
    return start, now


def month_start() -> datetime:
    return month_window()[0]