From 7348055d7298c4090053679dcd97d43ae4f83644 Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Thu, 28 May 2026 12:36:55 +0200 Subject: [PATCH] llm: estimate cost from tokens when provider omits it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DeepSeek's native API returns prompt_tokens/completion_tokens but not `usage.cost`. OpenRouter returns both. Result: with DeepSeek-direct as primary (current default), every LogResult.cost_usd was None — and every downstream cost ledger row (AICall, StrategicLog, IndicatorSummary, translation tables) stored None instead of the real spend. Added a per-model rate table and fallback computation in _call_provider: when the upstream omits cost, multiply tokens by the table rates. If the upstream DOES return cost, keep it (authoritative). Falls back to None if both the upstream and the table miss. deepseek-v4-flash rates: \$0.07/M input, \$0.28/M output (per DeepSeek). --- app/services/openrouter.py | 39 +++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) diff --git a/app/services/openrouter.py b/app/services/openrouter.py index c1ddb4f..ca31f2f 100644 --- a/app/services/openrouter.py +++ b/app/services/openrouter.py @@ -20,6 +20,31 @@ from app.config import get_settings OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" +# Per-model USD rates: (input_per_million, output_per_million). +# OpenRouter returns `usage.cost` directly; DeepSeek's native API does not. +# Used as a fallback when the upstream omits the cost field. +_MODEL_PRICING_USD_PER_MILLION: dict[str, tuple[float, float]] = { + "deepseek-v4-flash": (0.07, 0.28), + "deepseek/deepseek-v4-flash": (0.07, 0.28), + "deepseek-chat": (0.27, 1.10), + "deepseek-reasoner": (0.55, 2.19), +} + + +def _estimate_cost_usd(model: str, prompt_tokens, completion_tokens) -> float | None: + """Compute cost from token counts when the upstream didn't return one. + + Returns None if either token count is missing or the model isn't in + the pricing table — caller falls back to whatever value the upstream + did (or didn't) return. + """ + rates = _MODEL_PRICING_USD_PER_MILLION.get(model) + if rates is None or prompt_tokens is None or completion_tokens is None: + return None + in_rate, out_rate = rates + return (prompt_tokens * in_rate + completion_tokens * out_rate) / 1_000_000.0 + + @dataclass class LogResult: content: str @@ -141,13 +166,21 @@ async def _call_provider( f"provider={provider}, model={used_model}, max_tokens={max_tokens})" ) usage = data.get("usage") or {} + prompt_tokens = usage.get("prompt_tokens") + completion_tokens = usage.get("completion_tokens") + # OpenRouter populates `usage.cost`; DeepSeek's native API doesn't — + # estimate from tokens × per-model rates so the cost ledger stays + # populated regardless of which provider answered. + cost_usd = usage.get("cost") or usage.get("total_cost") + if cost_usd is None: + cost_usd = _estimate_cost_usd(used_model, prompt_tokens, completion_tokens) return LogResult( content=content, # Record provider+model so admin can see which path produced this row. model=f"{provider}/{used_model}", - prompt_tokens=usage.get("prompt_tokens"), - completion_tokens=usage.get("completion_tokens"), - cost_usd=usage.get("cost") or usage.get("total_cost"), + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + cost_usd=cost_usd, )