diff --git a/app/services/openrouter.py b/app/services/openrouter.py index c1ddb4f..ca31f2f 100644 --- a/app/services/openrouter.py +++ b/app/services/openrouter.py @@ -20,6 +20,31 @@ from app.config import get_settings OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" +# Per-model USD rates: (input_per_million, output_per_million). +# OpenRouter returns `usage.cost` directly; DeepSeek's native API does not. +# Used as a fallback when the upstream omits the cost field. +_MODEL_PRICING_USD_PER_MILLION: dict[str, tuple[float, float]] = { + "deepseek-v4-flash": (0.07, 0.28), + "deepseek/deepseek-v4-flash": (0.07, 0.28), + "deepseek-chat": (0.27, 1.10), + "deepseek-reasoner": (0.55, 2.19), +} + + +def _estimate_cost_usd(model: str, prompt_tokens, completion_tokens) -> float | None: + """Compute cost from token counts when the upstream didn't return one. + + Returns None if either token count is missing or the model isn't in + the pricing table — caller falls back to whatever value the upstream + did (or didn't) return. + """ + rates = _MODEL_PRICING_USD_PER_MILLION.get(model) + if rates is None or prompt_tokens is None or completion_tokens is None: + return None + in_rate, out_rate = rates + return (prompt_tokens * in_rate + completion_tokens * out_rate) / 1_000_000.0 + + @dataclass class LogResult: content: str @@ -141,13 +166,21 @@ async def _call_provider( f"provider={provider}, model={used_model}, max_tokens={max_tokens})" ) usage = data.get("usage") or {} + prompt_tokens = usage.get("prompt_tokens") + completion_tokens = usage.get("completion_tokens") + # OpenRouter populates `usage.cost`; DeepSeek's native API doesn't — + # estimate from tokens × per-model rates so the cost ledger stays + # populated regardless of which provider answered. + cost_usd = usage.get("cost") or usage.get("total_cost") + if cost_usd is None: + cost_usd = _estimate_cost_usd(used_model, prompt_tokens, completion_tokens) return LogResult( content=content, # Record provider+model so admin can see which path produced this row. model=f"{provider}/{used_model}", - prompt_tokens=usage.get("prompt_tokens"), - completion_tokens=usage.get("completion_tokens"), - cost_usd=usage.get("cost") or usage.get("total_cost"), + prompt_tokens=prompt_tokens, + completion_tokens=completion_tokens, + cost_usd=cost_usd, )