diff --git a/app/services/openrouter.py b/app/services/openrouter.py
index ca31f2f..598150c 100644
--- a/app/services/openrouter.py
+++ b/app/services/openrouter.py
@@ -136,10 +136,15 @@ async def _call_provider(
     messages: list[dict],
     model: str | None,
     max_tokens: int,
+    response_format: dict | None = None,
 ) -> LogResult:
     """One provider call with tenacity retries on transport/HTTP errors.
     Lives inside the retry decorator so retries happen within a provider,
-    not across the fallback chain."""
+    not across the fallback chain.
+
+    `response_format` is forwarded to the provider verbatim — DeepSeek and
+    OpenRouter both accept the OpenAI-shaped {"type": "json_object"} for
+    JSON-mode generation. None means free-form text."""
     url, api_key, default_model, extra_headers = _endpoint_for(provider)
     used_model = model or default_model
     headers = {
@@ -147,18 +152,22 @@ async def _call_provider(
         "Content-Type": "application/json",
         **extra_headers,
     }
-    r = await client.post(
-        url,
-        headers=headers,
-        json={"model": used_model, "messages": messages, "max_tokens": max_tokens},
-        timeout=180,
-    )
+    body: dict = {"model": used_model, "messages": messages, "max_tokens": max_tokens}
+    if response_format is not None:
+        body["response_format"] = response_format
+    r = await client.post(url, headers=headers, json=body, timeout=180)
     r.raise_for_status()
     data = r.json()
     msg = data["choices"][0]["message"]
-    # Some providers return null content + populated `reasoning` for thinking
-    # models, or null content when finish_reason=length cut off the response.
-    content = msg.get("content") or msg.get("reasoning")
+    # The `content` field is the model's user-facing answer. The optional
+    # `reasoning` field is the model's internal chain-of-thought — never
+    # safe to publish; it contains raw scratchpad ("Let's see…",
+    # mid-sentence question marks, planning notes). If `content` is empty
+    # (provider issue, finish_reason=length cutoff, or the model spent
+    # its budget on thinking), treat that as a generation failure and
+    # raise so the caller can retry or skip the row. Do NOT fall back to
+    # reasoning — see the 2026-05-29 valuation-read leak.
+    content = msg.get("content")
     if not content:
         finish = data["choices"][0].get("finish_reason")
         raise RuntimeError(
@@ -189,6 +198,7 @@ async def call_llm(
     messages: list[dict],
     model: str | None = None,
     max_tokens: int = 4000,
+    response_format: dict | None = None,
 ) -> LogResult:
     """Provider-aware chat completion with fallback. Tries primary
     (LLM_PROVIDER) first; if it raises after retries, falls through to
@@ -197,7 +207,11 @@ async def call_llm(
     The returned LogResult.model is prefixed with the provider that
     actually answered (e.g. ``deepseek/deepseek-v4-flash`` or
     ``openrouter/deepseek/deepseek-v4-flash``) — useful admin metadata
-    even though we hide it from the user-facing UI."""
+    even though we hide it from the user-facing UI.
+
+    Pass response_format={"type": "json_object"} to force JSON-mode
+    output (the model still needs to be instructed in the system prompt
+    to emit valid JSON — this flag enforces, not asks)."""
     chain = _provider_chain()
     if not chain:
         raise RuntimeError("No LLM provider configured (no API key set)")
@@ -207,6 +221,7 @@ async def call_llm(
         try:
             result = await _call_provider(
                 client, provider, messages, model, max_tokens,
+                response_format=response_format,
             )
             if i > 0:
                 from app.logging import get_logger
diff --git a/tests/test_openrouter_transport.py b/tests/test_openrouter_transport.py
index dfc14b0..e836044 100644
--- a/tests/test_openrouter_transport.py
+++ b/tests/test_openrouter_transport.py
@@ -183,10 +183,12 @@ async def test_call_llm_uses_upstream_cost_when_provided(monkeypatch):
 
 
 @pytest.mark.asyncio
-async def test_call_llm_falls_back_to_reasoning_field_when_content_null(monkeypatch):
-    """Thinking models sometimes return null `content` plus a populated
-    `reasoning` block — we surface the reasoning so the caller still gets
-    something usable rather than treating the row as empty."""
+async def test_call_llm_does_not_publish_reasoning_when_content_null(monkeypatch):
+    """The `reasoning` field is the model's internal chain-of-thought
+    (scratchpad: "Let's see…", planning notes, half-formed math). It is
+    never safe to surface as the user-facing answer — see the
+    2026-05-29 valuation-read leak. If `content` is null we treat the
+    row as a generation failure and raise; the caller can retry or skip."""
     _configure(monkeypatch, DEEPSEEK_API_KEY="sk-d", LLM_FALLBACK="")
 
     def handler(request: httpx.Request) -> httpx.Response:
@@ -199,8 +201,8 @@ async def test_call_llm_falls_back_to_reasoning_field_when_content_null(monkeypa
         })
 
     async with httpx.AsyncClient(transport=_mock_post(handler)) as client:
-        result = await ot.call_llm(client, [{"role": "user", "content": "hi"}])
-    assert result.content == "deep thought"
+        with pytest.raises(RuntimeError, match="LLM returned empty content"):
+            await ot.call_llm(client, [{"role": "user", "content": "hi"}])
 
 
 @pytest.mark.asyncio
@@ -228,7 +230,7 @@ async def test_call_llm_falls_back_to_secondary_when_primary_raises(monkeypatch)
         prompt_tokens=1, completion_tokens=2, cost_usd=0.0,
     )
 
-    async def fake(_client, provider, _messages, _model, _max_tokens):
+    async def fake(_client, provider, _messages, _model, _max_tokens, response_format=None):
         calls.append(provider)
         if provider == "deepseek":
             raise RuntimeError("primary down")
@@ -247,7 +249,7 @@ async def test_call_llm_raises_last_exception_when_chain_exhausted(monkeypatch):
     _configure(monkeypatch,
                DEEPSEEK_API_KEY="sk-d", OPENROUTER_API_KEY="sk-or")
 
-    async def fake(_client, provider, _messages, _model, _max_tokens):
+    async def fake(_client, provider, _messages, _model, _max_tokens, response_format=None):
         raise RuntimeError(f"{provider} broken")
 
     with patch.object(ot, "_call_provider", fake):