llm: estimate cost from tokens when provider omits it

DeepSeek's native API returns prompt_tokens/completion_tokens but not `usage.cost`. OpenRouter returns both. Result: with DeepSeek-direct as primary (current default), every LogResult.cost_usd was None — and every downstream cost ledger row (AICall, StrategicLog, IndicatorSummary, translation tables) stored None instead of the real spend. Added a per-model rate table and fallback computation in _call_provider: when the upstream omits cost, multiply tokens by the table rates. If the upstream DOES return cost, keep it (authoritative). Falls back to None if both the upstream and the table miss. deepseek-v4-flash rates: \$0.07/M input, \$0.28/M output (per DeepSeek).
2026-05-28 12:36:55 +02:00 · 2026-05-28 12:36:55 +02:00 · 7348055d72
commit 7348055d72
parent 355593c4f7
1 changed files with 36 additions and 3 deletions
--- a/app/services/openrouter.py
+++ b/app/services/openrouter.py
@ -20,6 +20,31 @@ from app.config import get_settings
 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
 # Per-model USD rates: (input_per_million, output_per_million).
 # OpenRouter returns `usage.cost` directly; DeepSeek's native API does not.
 # Used as a fallback when the upstream omits the cost field.
 _MODEL_PRICING_USD_PER_MILLION: dict[str, tuple[float, float]] = {
    "deepseek-v4-flash":          (0.07, 0.28),
    "deepseek/deepseek-v4-flash": (0.07, 0.28),
    "deepseek-chat":              (0.27, 1.10),
    "deepseek-reasoner":          (0.55, 2.19),
 }
 def _estimate_cost_usd(model: str, prompt_tokens, completion_tokens) -> float | None:
    """Compute cost from token counts when the upstream didn't return one.
    Returns None if either token count is missing or the model isn't in
    the pricing table — caller falls back to whatever value the upstream
    did (or didn't) return.
    """
    rates = _MODEL_PRICING_USD_PER_MILLION.get(model)
    if rates is None or prompt_tokens is None or completion_tokens is None:
        return None
    in_rate, out_rate = rates
    return (prompt_tokens * in_rate + completion_tokens * out_rate) / 1_000_000.0
@dataclass
 class LogResult:
    content: str
@ -141,13 +166,21 @@ async def _call_provider(
            f"provider={provider}, model={used_model}, max_tokens={max_tokens})"
        )
    usage = data.get("usage") or {}
    prompt_tokens = usage.get("prompt_tokens")
    completion_tokens = usage.get("completion_tokens")
    # OpenRouter populates `usage.cost`; DeepSeek's native API doesn't —
    # estimate from tokens × per-model rates so the cost ledger stays
    # populated regardless of which provider answered.
    cost_usd = usage.get("cost") or usage.get("total_cost")
    if cost_usd is None:
        cost_usd = _estimate_cost_usd(used_model, prompt_tokens, completion_tokens)
    return LogResult(
        content=content,
        # Record provider+model so admin can see which path produced this row.
        model=f"{provider}/{used_model}",
-        prompt_tokens=usage.get("prompt_tokens"),
+        prompt_tokens=prompt_tokens,
-        completion_tokens=usage.get("completion_tokens"),
+        completion_tokens=completion_tokens,
-        cost_usd=usage.get("cost") or usage.get("total_cost"),
+        cost_usd=cost_usd,
    )