From 7348055d7298c4090053679dcd97d43ae4f83644 Mon Sep 17 00:00:00 2001
From: Giorgio Gilestro <giorgio@gilest.ro>
Date: Thu, 28 May 2026 12:36:55 +0200
Subject: [PATCH] llm: estimate cost from tokens when provider omits it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DeepSeek's native API returns prompt_tokens/completion_tokens but not
`usage.cost`. OpenRouter returns both. Result: with DeepSeek-direct as
primary (current default), every LogResult.cost_usd was None — and
every downstream cost ledger row (AICall, StrategicLog,
IndicatorSummary, translation tables) stored None instead of the real
spend.

Added a per-model rate table and fallback computation in _call_provider:
when the upstream omits cost, multiply tokens by the table rates. If the
upstream DOES return cost, keep it (authoritative). Falls back to None
if both the upstream and the table miss.

deepseek-v4-flash rates: \$0.07/M input, \$0.28/M output (per DeepSeek).
---
 app/services/openrouter.py | 39 +++++++++++++++++++++++++++++++++++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/app/services/openrouter.py b/app/services/openrouter.py
index c1ddb4f..ca31f2f 100644
--- a/app/services/openrouter.py
+++ b/app/services/openrouter.py
@@ -20,6 +20,31 @@ from app.config import get_settings
 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
 
 
+# Per-model USD rates: (input_per_million, output_per_million).
+# OpenRouter returns `usage.cost` directly; DeepSeek's native API does not.
+# Used as a fallback when the upstream omits the cost field.
+_MODEL_PRICING_USD_PER_MILLION: dict[str, tuple[float, float]] = {
+    "deepseek-v4-flash":          (0.07, 0.28),
+    "deepseek/deepseek-v4-flash": (0.07, 0.28),
+    "deepseek-chat":              (0.27, 1.10),
+    "deepseek-reasoner":          (0.55, 2.19),
+}
+
+
+def _estimate_cost_usd(model: str, prompt_tokens, completion_tokens) -> float | None:
+    """Compute cost from token counts when the upstream didn't return one.
+
+    Returns None if either token count is missing or the model isn't in
+    the pricing table — caller falls back to whatever value the upstream
+    did (or didn't) return.
+    """
+    rates = _MODEL_PRICING_USD_PER_MILLION.get(model)
+    if rates is None or prompt_tokens is None or completion_tokens is None:
+        return None
+    in_rate, out_rate = rates
+    return (prompt_tokens * in_rate + completion_tokens * out_rate) / 1_000_000.0
+
+
 @dataclass
 class LogResult:
     content: str
@@ -141,13 +166,21 @@ async def _call_provider(
             f"provider={provider}, model={used_model}, max_tokens={max_tokens})"
         )
     usage = data.get("usage") or {}
+    prompt_tokens = usage.get("prompt_tokens")
+    completion_tokens = usage.get("completion_tokens")
+    # OpenRouter populates `usage.cost`; DeepSeek's native API doesn't —
+    # estimate from tokens × per-model rates so the cost ledger stays
+    # populated regardless of which provider answered.
+    cost_usd = usage.get("cost") or usage.get("total_cost")
+    if cost_usd is None:
+        cost_usd = _estimate_cost_usd(used_model, prompt_tokens, completion_tokens)
     return LogResult(
         content=content,
         # Record provider+model so admin can see which path produced this row.
         model=f"{provider}/{used_model}",
-        prompt_tokens=usage.get("prompt_tokens"),
-        completion_tokens=usage.get("completion_tokens"),
-        cost_usd=usage.get("cost") or usage.get("total_cost"),
+        prompt_tokens=prompt_tokens,
+        completion_tokens=completion_tokens,
+        cost_usd=cost_usd,
     )