"""Transport-layer tests for app.services.openrouter. The companion file `test_openrouter_prompt.py` covers prompt building; this one covers the HTTP plumbing: provider chain selection, endpoint resolution, the per-call retry/parse path in `_call_provider`, and fallback behaviour in `call_llm`. Network requests are intercepted with ``httpx.MockTransport`` so nothing hits the wire. """ from __future__ import annotations import json from unittest.mock import patch import httpx import pytest from app.config import get_settings from app.services import openrouter as ot # --------------------------------------------------------------------------- # _estimate_cost_usd # --------------------------------------------------------------------------- def test_estimate_cost_known_model_uses_table_rates(): # deepseek-v4-flash table: 0.07/M input, 0.28/M output. # 1000 in + 2000 out = 0.000_07 + 0.000_56 = 0.000_63. cost = ot._estimate_cost_usd("deepseek-v4-flash", 1000, 2000) assert cost == pytest.approx(0.00063, rel=1e-9) def test_estimate_cost_handles_provider_prefixed_model_name(): # OpenRouter-style model strings use the slash-prefixed form. cost = ot._estimate_cost_usd("deepseek/deepseek-v4-flash", 1000, 2000) assert cost == pytest.approx(0.00063, rel=1e-9) def test_estimate_cost_unknown_model_returns_none(): assert ot._estimate_cost_usd("never-heard-of-this-model", 100, 200) is None def test_estimate_cost_missing_tokens_returns_none(): assert ot._estimate_cost_usd("deepseek-v4-flash", None, 200) is None assert ot._estimate_cost_usd("deepseek-v4-flash", 100, None) is None assert ot._estimate_cost_usd("deepseek-v4-flash", None, None) is None # --------------------------------------------------------------------------- # _provider_chain / llm_configured / active_model # --------------------------------------------------------------------------- def _configure(monkeypatch, **overrides): """Apply a small bundle of LLM settings for one test.""" s = get_settings() defaults = { "LLM_PROVIDER": "deepseek", "LLM_FALLBACK": "openrouter", "DEEPSEEK_API_KEY": "", "OPENROUTER_API_KEY": "", "DEEPSEEK_MODEL": "deepseek-v4-flash", "OPENROUTER_MODEL": "deepseek/deepseek-v4-flash", "DEEPSEEK_URL": "https://api.deepseek.com/chat/completions", } defaults.update(overrides) for k, v in defaults.items(): monkeypatch.setattr(s, k, v, raising=False) def test_provider_chain_drops_providers_without_keys(monkeypatch): _configure(monkeypatch, DEEPSEEK_API_KEY="sk-deepseek") # openrouter key missing assert ot._provider_chain() == ["deepseek"] assert ot.llm_configured() is True def test_provider_chain_lists_primary_then_fallback(monkeypatch): _configure(monkeypatch, DEEPSEEK_API_KEY="sk-deepseek", OPENROUTER_API_KEY="sk-openrouter") assert ot._provider_chain() == ["deepseek", "openrouter"] def test_provider_chain_skips_duplicate_when_primary_equals_fallback(monkeypatch): _configure(monkeypatch, LLM_FALLBACK="deepseek", DEEPSEEK_API_KEY="sk") assert ot._provider_chain() == ["deepseek"] def test_llm_configured_false_when_no_keys(monkeypatch): _configure(monkeypatch) # both keys empty assert ot.llm_configured() is False assert ot._provider_chain() == [] assert ot.active_model() == "unknown" def test_active_model_reflects_primary(monkeypatch): _configure(monkeypatch, LLM_PROVIDER="openrouter", OPENROUTER_API_KEY="sk-or", DEEPSEEK_API_KEY="") assert ot.active_model() == "deepseek/deepseek-v4-flash" # OPENROUTER_MODEL # --------------------------------------------------------------------------- # _endpoint_for # --------------------------------------------------------------------------- def test_endpoint_for_unknown_provider_raises(monkeypatch): _configure(monkeypatch, DEEPSEEK_API_KEY="sk") with pytest.raises(RuntimeError, match="Unknown LLM provider"): ot._endpoint_for("anthropic") def test_endpoint_for_provider_without_key_raises(monkeypatch): _configure(monkeypatch) # both keys empty with pytest.raises(RuntimeError, match="DEEPSEEK_API_KEY not set"): ot._endpoint_for("deepseek") with pytest.raises(RuntimeError, match="OPENROUTER_API_KEY not set"): ot._endpoint_for("openrouter") def test_endpoint_for_openrouter_includes_attribution_and_no_train_headers(monkeypatch): _configure(monkeypatch, OPENROUTER_API_KEY="sk-or") url, key, model, headers = ot._endpoint_for("openrouter") assert url.endswith("/chat/completions") assert key == "sk-or" assert headers["X-OR-Allow-Training"] == "false" assert "HTTP-Referer" in headers and "X-Title" in headers # --------------------------------------------------------------------------- # _call_provider (through call_llm so retry doesn't fire — happy paths only) # --------------------------------------------------------------------------- def _mock_post(callback): """Wrap a callback into an httpx.MockTransport. Callback receives the request and returns either an httpx.Response or raises.""" return httpx.MockTransport(callback) @pytest.mark.asyncio async def test_call_llm_returns_parsed_log_result(monkeypatch): _configure(monkeypatch, DEEPSEEK_API_KEY="sk-deepseek", LLM_FALLBACK="") def handler(request: httpx.Request) -> httpx.Response: body = json.loads(request.content.decode()) assert body["model"] == "deepseek-v4-flash" return httpx.Response(200, json={ "choices": [{"message": {"content": "hello"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 100, "completion_tokens": 200}, }) async with httpx.AsyncClient(transport=_mock_post(handler)) as client: result = await ot.call_llm(client, [{"role": "user", "content": "hi"}]) assert result.content == "hello" # Model is prefixed with the answering provider for ledger traceability. assert result.model == "deepseek/deepseek-v4-flash" assert result.prompt_tokens == 100 assert result.completion_tokens == 200 # DeepSeek doesn't return cost — estimated from tokens. # 100 * 0.07 + 200 * 0.28 = 7 + 56 = 63 → 0.000063. assert result.cost_usd == pytest.approx(0.000063, rel=1e-9) @pytest.mark.asyncio async def test_call_llm_uses_upstream_cost_when_provided(monkeypatch): """When the upstream supplies usage.cost (OpenRouter), we trust it and skip the per-model table estimate.""" _configure(monkeypatch, LLM_PROVIDER="openrouter", OPENROUTER_API_KEY="sk-or", LLM_FALLBACK="") def handler(request: httpx.Request) -> httpx.Response: return httpx.Response(200, json={ "choices": [{"message": {"content": "ok"}, "finish_reason": "stop"}], "usage": {"prompt_tokens": 50, "completion_tokens": 50, "cost": 0.0042}, }) async with httpx.AsyncClient(transport=_mock_post(handler)) as client: result = await ot.call_llm(client, [{"role": "user", "content": "hi"}]) assert result.cost_usd == 0.0042 @pytest.mark.asyncio async def test_call_llm_does_not_publish_reasoning_when_content_null(monkeypatch): """The `reasoning` field is the model's internal chain-of-thought (scratchpad: "Let's see…", planning notes, half-formed math). It is never safe to surface as the user-facing answer — see the 2026-05-29 valuation-read leak. If `content` is null we treat the row as a generation failure and raise; the caller can retry or skip.""" _configure(monkeypatch, DEEPSEEK_API_KEY="sk-d", LLM_FALLBACK="") def handler(request: httpx.Request) -> httpx.Response: return httpx.Response(200, json={ "choices": [{ "message": {"content": None, "reasoning": "deep thought"}, "finish_reason": "stop", }], "usage": {"prompt_tokens": 10, "completion_tokens": 20}, }) async with httpx.AsyncClient(transport=_mock_post(handler)) as client: with pytest.raises(RuntimeError, match="LLM returned empty content"): await ot.call_llm(client, [{"role": "user", "content": "hi"}]) @pytest.mark.asyncio async def test_call_llm_raises_when_no_provider_configured(monkeypatch): _configure(monkeypatch) # both keys empty async with httpx.AsyncClient() as client: with pytest.raises(RuntimeError, match="No LLM provider configured"): await ot.call_llm(client, [{"role": "user", "content": "hi"}]) # --------------------------------------------------------------------------- # call_llm fallback chain — patch _call_provider to bypass the retry/sleep # decorator and exercise the cross-provider failover logic directly. # --------------------------------------------------------------------------- @pytest.mark.asyncio async def test_call_llm_falls_back_to_secondary_when_primary_raises(monkeypatch): _configure(monkeypatch, DEEPSEEK_API_KEY="sk-d", OPENROUTER_API_KEY="sk-or") calls = [] success = ot.LogResult( content="from-fallback", model="openrouter/deepseek/deepseek-v4-flash", prompt_tokens=1, completion_tokens=2, cost_usd=0.0, ) async def fake(_client, provider, _messages, _model, _max_tokens, response_format=None): calls.append(provider) if provider == "deepseek": raise RuntimeError("primary down") return success with patch.object(ot, "_call_provider", fake): async with httpx.AsyncClient() as client: result = await ot.call_llm(client, [{"role": "user", "content": "hi"}]) assert calls == ["deepseek", "openrouter"] assert result.content == "from-fallback" @pytest.mark.asyncio async def test_call_llm_raises_last_exception_when_chain_exhausted(monkeypatch): _configure(monkeypatch, DEEPSEEK_API_KEY="sk-d", OPENROUTER_API_KEY="sk-or") async def fake(_client, provider, _messages, _model, _max_tokens, response_format=None): raise RuntimeError(f"{provider} broken") with patch.object(ot, "_call_provider", fake): async with httpx.AsyncClient() as client: with pytest.raises(RuntimeError, match="openrouter broken"): await ot.call_llm(client, [{"role": "user", "content": "hi"}])