llm: estimate cost from tokens when provider omits it

DeepSeek's native API returns prompt_tokens/completion_tokens but not
`usage.cost`. OpenRouter returns both. Result: with DeepSeek-direct as
primary (current default), every LogResult.cost_usd was None — and
every downstream cost ledger row (AICall, StrategicLog,
IndicatorSummary, translation tables) stored None instead of the real
spend.

Added a per-model rate table and fallback computation in _call_provider:
when the upstream omits cost, multiply tokens by the table rates. If the
upstream DOES return cost, keep it (authoritative). Falls back to None
if both the upstream and the table miss.

deepseek-v4-flash rates: \$0.07/M input, \$0.28/M output (per DeepSeek).
This commit is contained in:
Giorgio Gilestro 2026-05-28 12:36:55 +02:00
parent 355593c4f7
commit 7348055d72

View file

@ -20,6 +20,31 @@ from app.config import get_settings
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
# Per-model USD rates: (input_per_million, output_per_million).
# OpenRouter returns `usage.cost` directly; DeepSeek's native API does not.
# Used as a fallback when the upstream omits the cost field.
_MODEL_PRICING_USD_PER_MILLION: dict[str, tuple[float, float]] = {
"deepseek-v4-flash": (0.07, 0.28),
"deepseek/deepseek-v4-flash": (0.07, 0.28),
"deepseek-chat": (0.27, 1.10),
"deepseek-reasoner": (0.55, 2.19),
}
def _estimate_cost_usd(model: str, prompt_tokens, completion_tokens) -> float | None:
"""Compute cost from token counts when the upstream didn't return one.
Returns None if either token count is missing or the model isn't in
the pricing table caller falls back to whatever value the upstream
did (or didn't) return.
"""
rates = _MODEL_PRICING_USD_PER_MILLION.get(model)
if rates is None or prompt_tokens is None or completion_tokens is None:
return None
in_rate, out_rate = rates
return (prompt_tokens * in_rate + completion_tokens * out_rate) / 1_000_000.0
@dataclass @dataclass
class LogResult: class LogResult:
content: str content: str
@ -141,13 +166,21 @@ async def _call_provider(
f"provider={provider}, model={used_model}, max_tokens={max_tokens})" f"provider={provider}, model={used_model}, max_tokens={max_tokens})"
) )
usage = data.get("usage") or {} usage = data.get("usage") or {}
prompt_tokens = usage.get("prompt_tokens")
completion_tokens = usage.get("completion_tokens")
# OpenRouter populates `usage.cost`; DeepSeek's native API doesn't —
# estimate from tokens × per-model rates so the cost ledger stays
# populated regardless of which provider answered.
cost_usd = usage.get("cost") or usage.get("total_cost")
if cost_usd is None:
cost_usd = _estimate_cost_usd(used_model, prompt_tokens, completion_tokens)
return LogResult( return LogResult(
content=content, content=content,
# Record provider+model so admin can see which path produced this row. # Record provider+model so admin can see which path produced this row.
model=f"{provider}/{used_model}", model=f"{provider}/{used_model}",
prompt_tokens=usage.get("prompt_tokens"), prompt_tokens=prompt_tokens,
completion_tokens=usage.get("completion_tokens"), completion_tokens=completion_tokens,
cost_usd=usage.get("cost") or usage.get("total_cost"), cost_usd=cost_usd,
) )