llm: estimate cost from tokens when provider omits it
DeepSeek's native API returns prompt_tokens/completion_tokens but not `usage.cost`. OpenRouter returns both. Result: with DeepSeek-direct as primary (current default), every LogResult.cost_usd was None — and every downstream cost ledger row (AICall, StrategicLog, IndicatorSummary, translation tables) stored None instead of the real spend. Added a per-model rate table and fallback computation in _call_provider: when the upstream omits cost, multiply tokens by the table rates. If the upstream DOES return cost, keep it (authoritative). Falls back to None if both the upstream and the table miss. deepseek-v4-flash rates: \$0.07/M input, \$0.28/M output (per DeepSeek).
This commit is contained in:
parent
355593c4f7
commit
7348055d72
1 changed files with 36 additions and 3 deletions
|
|
@ -20,6 +20,31 @@ from app.config import get_settings
|
||||||
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
|
||||||
|
|
||||||
|
|
||||||
|
# Per-model USD rates: (input_per_million, output_per_million).
|
||||||
|
# OpenRouter returns `usage.cost` directly; DeepSeek's native API does not.
|
||||||
|
# Used as a fallback when the upstream omits the cost field.
|
||||||
|
_MODEL_PRICING_USD_PER_MILLION: dict[str, tuple[float, float]] = {
|
||||||
|
"deepseek-v4-flash": (0.07, 0.28),
|
||||||
|
"deepseek/deepseek-v4-flash": (0.07, 0.28),
|
||||||
|
"deepseek-chat": (0.27, 1.10),
|
||||||
|
"deepseek-reasoner": (0.55, 2.19),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _estimate_cost_usd(model: str, prompt_tokens, completion_tokens) -> float | None:
|
||||||
|
"""Compute cost from token counts when the upstream didn't return one.
|
||||||
|
|
||||||
|
Returns None if either token count is missing or the model isn't in
|
||||||
|
the pricing table — caller falls back to whatever value the upstream
|
||||||
|
did (or didn't) return.
|
||||||
|
"""
|
||||||
|
rates = _MODEL_PRICING_USD_PER_MILLION.get(model)
|
||||||
|
if rates is None or prompt_tokens is None or completion_tokens is None:
|
||||||
|
return None
|
||||||
|
in_rate, out_rate = rates
|
||||||
|
return (prompt_tokens * in_rate + completion_tokens * out_rate) / 1_000_000.0
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class LogResult:
|
class LogResult:
|
||||||
content: str
|
content: str
|
||||||
|
|
@ -141,13 +166,21 @@ async def _call_provider(
|
||||||
f"provider={provider}, model={used_model}, max_tokens={max_tokens})"
|
f"provider={provider}, model={used_model}, max_tokens={max_tokens})"
|
||||||
)
|
)
|
||||||
usage = data.get("usage") or {}
|
usage = data.get("usage") or {}
|
||||||
|
prompt_tokens = usage.get("prompt_tokens")
|
||||||
|
completion_tokens = usage.get("completion_tokens")
|
||||||
|
# OpenRouter populates `usage.cost`; DeepSeek's native API doesn't —
|
||||||
|
# estimate from tokens × per-model rates so the cost ledger stays
|
||||||
|
# populated regardless of which provider answered.
|
||||||
|
cost_usd = usage.get("cost") or usage.get("total_cost")
|
||||||
|
if cost_usd is None:
|
||||||
|
cost_usd = _estimate_cost_usd(used_model, prompt_tokens, completion_tokens)
|
||||||
return LogResult(
|
return LogResult(
|
||||||
content=content,
|
content=content,
|
||||||
# Record provider+model so admin can see which path produced this row.
|
# Record provider+model so admin can see which path produced this row.
|
||||||
model=f"{provider}/{used_model}",
|
model=f"{provider}/{used_model}",
|
||||||
prompt_tokens=usage.get("prompt_tokens"),
|
prompt_tokens=prompt_tokens,
|
||||||
completion_tokens=usage.get("completion_tokens"),
|
completion_tokens=completion_tokens,
|
||||||
cost_usd=usage.get("cost") or usage.get("total_cost"),
|
cost_usd=cost_usd,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue