diff --git a/app/jobs/indicator_summary_job.py b/app/jobs/indicator_summary_job.py
index 97c5f80..422c49c 100644
--- a/app/jobs/indicator_summary_job.py
+++ b/app/jobs/indicator_summary_job.py
@@ -4,7 +4,7 @@ hourly stays comfortably under the monthly cap."""
 from __future__ import annotations
 
 import asyncio
-import re
+import json
 
 import httpx
 from sqlalchemy import desc, func, select
@@ -35,6 +35,7 @@ from app.services.openrouter import (
     llm_configured,
     month_start,
 )
+from app.services.output_review import review_read
 from app.services.translation import translate
 
 
@@ -106,109 +107,41 @@ async def translate_summary_for_active_languages(session, summary_id: int) -> No
                  summary_id=summary_id, succeeded=succeeded, failed=failed)
 
 
-# Strip known meta-commentary openers the model sometimes leaks despite the
-# prompt's hard constraints. Each pattern matches one leading sentence.
-_LEAK_PATTERNS = [
-    re.compile(p, re.IGNORECASE | re.DOTALL)
-    for p in (
-        # First-person meta — "I need to / I'll / I have to / I'm going to ..."
-        r"^i\s+(?:need|have|must|should|am going|'ll|will|shall|can|am)[^.]*\.\s*",
-        # "We need / we're / we are asked / we will ..."
-        r"^we\s+(?:need|are|'re|will|shall|can|should|must|have)[^.]*\.\s*",
-        r"^let\s+(?:me|us|'?s)[^.]*\.\s*",
-        r"^here['’]s[^.]*\.\s*",
-        r"^sure[,!]?\s[^.]*\.\s*",
-        r"^looking at[^.]*\.\s*",
-        r"^based on[^.]*\.\s*",
-        r"^to (?:address|answer|write|summarise|summarize)[^.]*\.\s*",
-        r"^first[,]?\s[^.]*\.\s*",
-        r"^the (?:user|data shows|reader|task|request|reader sees|instructions?)[^.]*\.\s*",
-        r"^summary[:.]\s*",
-        r"^key\s*[:\-—]\s*",
-        r"^must\s+(?:be|cite|explain|avoid|give|stay|provide)[^.]*\.\s*",
-        r"^should\s+(?:be|give|cite|explain|avoid|provide)[^.]*\.\s*",
-        r"^avoid[^.]*\.\s*",
-        r"^cite\s+at\s+most[^.]*\.\s*",
-        r"^be\s+(?:speculative|specific|concise|brief)[^.]*\.\s*",
-        r"^stay\s+on[^.]*\.\s*",
-        r"^okay[,]?\s+",
-        r"^alright[,]?\s+",
-        r"^thinking[^.]*\.\s*",
-        # Prompt-leak prefixes — the model echoes example framing or rule
-        # headers from the system prompt.
-        r"^(?:good|bad|positive|negative)\s+example\s*[:\-—]\s*",
-        r"^example\s+(?:good|bad)\s*[:\-—]\s*",
-        r"^example\s*[:\-—]\s*",
-        r"^reference\s+style\s*[:\-—]\s*",
-        # Prompt label echoes (markdown-style or plain-text)
-        r"^(?:hard\s+)?constraints?\s*[:\-—][^.\n]*[.\n]\s*",
-        r"^key\s+observations?\s*[:\-—]\s*",
-        r"^observations?\s*[:\-—]\s*",
-        r"^focus\s+on[^.]*\.\s*",
-        r"^output\s+the\s+read[^.]*\.\s*",
-        r"^plain\s+prose[^.]*\.\s*",
-        r"^the\s+indicators?[^.]*\.\s*",   # "The indicators include..." / "The indicators are..."
-        r"^indicators?\s*[:\-—]\s*",
-        r"^data\s*[:\-—]\s*",
-        r"^analysis\s*[:\-—]\s*",
-        r"^interpretation\s*[:\-—]\s*",
-        r"^read\s*[:\-—]\s*",
-        r"^note\s*[:\-—]\s*",
-        # Sometimes the response gets wrapped in literal quotes
-        r"^[\"“'`]+",
-    )
-]
+# Defence-in-depth: read generation goes through JSON mode + a reviewer.
+#
+# 1. The system prompt instructs the model to emit {"read": "..."} only;
+#    response_format={"type":"json_object"} forces well-formed JSON at
+#    the API layer, so prose outside the field is impossible.
+# 2. We extract `read`, then ask a second LLM call (services/output_review)
+#    whether the candidate text is publishable. Scratchpad INSIDE the
+#    field — "Let's see…", "X? Actually Y?" — is caught here.
+# 3. Any failure at either stage (parse, missing field, reviewer veto,
+#    reviewer error) drops the candidate. The previous good
+#    IndicatorSummary stays visible.
+#
+# The old _LEAK_PATTERNS / clean_summary / looks_like_leakage regex
+# scaffolding lived here previously. It produced false positives (e.g.
+# chopping off a legitimate leading sentence like "The indicators are
+# pricing…") and false negatives (it never caught the chain-of-thought
+# patterns the model actually emits). The reviewer agent replaces it.
 
 
-_TRAILING_QUOTE = re.compile(r"[\"”'`]+\s*$")
-
-# Tell-tale phrases that mean the model regurgitated the prompt as its
-# "answer" — we'd rather show nothing than show this.
-_LEAKAGE_FLAGS = (
-    "≤60 words", "60 words", "must be under", "must cite", "must explain",
-    "no meta-commentary", "no buy/sell", "horizon. ", "1-day moves",
-    "the instructions are", "instructions:", "constraints:", "hard constraints",
-    "good example", "bad example", "reference style",
-)
-
-
-def looks_like_leakage(text: str) -> bool:
-    """Heuristic: after cleaning, if these phrases still appear, the output
-    is contaminated prompt-regurgitation and shouldn't be shown."""
-    low = text.lower()
-    return any(flag in low for flag in _LEAKAGE_FLAGS)
-
-
-def clean_summary(text: str) -> str:
-    """Strip leading meta-commentary. If cleaning removes nearly everything
-    (suggesting the model emitted reasoning then ran out of tokens), fall
-    back to the last non-empty paragraph of the raw output — that's usually
-    where the actual answer ended up."""
-    raw = text.strip()
-    out = raw
-    # Up to 6 passes: handles compound leakage like
-    # "Constraints: <...>. The indicators are: <...>. <actual answer>"
-    for _ in range(6):
-        before = out
-        for pat in _LEAK_PATTERNS:
-            out = pat.sub("", out, count=1).lstrip()
-        if out == before:
-            break
-    if len(out) < 60 and len(raw) > 120:
-        # Cleaning ate too much; take the last non-empty paragraph of raw.
-        paragraphs = [p.strip() for p in re.split(r"\n\s*\n", raw) if p.strip()]
-        if paragraphs:
-            out = paragraphs[-1]
-            # Re-strip leaders from the recovered paragraph too.
-            for _ in range(2):
-                before = out
-                for pat in _LEAK_PATTERNS:
-                    out = pat.sub("", out, count=1).lstrip()
-                if out == before:
-                    break
-    # Trim any orphan closing quote/backtick from the wrap-strip above.
-    out = _TRAILING_QUOTE.sub("", out).rstrip()
-    return out
+def _extract_read(raw: str) -> str | None:
+    """Parse the model's JSON envelope and return the "read" field, or
+    None if the body isn't valid JSON / the field is missing / the field
+    isn't a string. Conservative: on any deviation from the schema we
+    drop the candidate rather than try to salvage it."""
+    try:
+        parsed = json.loads(raw)
+    except json.JSONDecodeError:
+        return None
+    if not isinstance(parsed, dict):
+        return None
+    read = parsed.get("read")
+    if not isinstance(read, str):
+        return None
+    read = read.strip()
+    return read or None
 
 
 
@@ -228,19 +161,20 @@ async def _generate_one(
             [{"role": "system", "content": system_prompt},
              {"role": "user",   "content": user_prompt}],
             max_tokens=800,  # DeepSeek sometimes spends 300+ on internal reasoning
+            response_format={"type": "json_object"},
         )
     except Exception as e:
         session.add(AICall(model=active_model(), status="error", error=str(e)[:500]))
         log.warning("ind_summary.failed", group=group, error=str(e)[:120])
         return None
 
-    cleaned = clean_summary(result.content)
-    if looks_like_leakage(cleaned) or len(cleaned) < 40:
-        # Model regurgitated the prompt or produced nothing usable.
-        # Don't persist — keep the last good summary visible. Log it so
-        # we can see the rate of failures over time.
-        log.warning("ind_summary.leakage_detected",
-                    group=group, preview=cleaned[:120])
+    candidate = _extract_read(result.content)
+    if candidate is None or len(candidate) < 40:
+        # JSON envelope malformed, "read" field missing/wrong type, or
+        # the candidate is too short to be a real read. Don't persist;
+        # the last good summary stays visible.
+        log.warning("ind_summary.json_invalid",
+                    group=group, preview=result.content[:160])
         session.add(AICall(
             model=result.model,
             prompt_tokens=result.prompt_tokens,
@@ -250,6 +184,23 @@ async def _generate_one(
         ))
         return None
 
+    verdict = await review_read(client, candidate)
+    if not verdict.clean:
+        # Reviewer caught scratchpad / meta-commentary / partial text
+        # INSIDE the read field. Drop the candidate; the previous good
+        # summary continues to serve.
+        log.warning("ind_summary.reviewer_rejected",
+                    group=group, reason=verdict.reason,
+                    preview=candidate[:120])
+        session.add(AICall(
+            model=result.model,
+            prompt_tokens=result.prompt_tokens,
+            completion_tokens=result.completion_tokens,
+            cost_usd=(result.cost_usd or 0.0) + (verdict.cost_usd or 0.0),
+            status="leaked",
+        ))
+        return None
+
     summary = IndicatorSummary(
         group_name=group,
         generated_at=utcnow(),
@@ -257,17 +208,19 @@ async def _generate_one(
         tone=tone,
         analysis=analysis,
         prompt_version=PROMPT_VERSION,
-        content=cleaned,
+        content=candidate,
         prompt_tokens=result.prompt_tokens,
         completion_tokens=result.completion_tokens,
-        cost_usd=result.cost_usd,
+        # Include the reviewer's cost in the row's recorded spend so the
+        # monthly budget tracking covers the full pipeline cost.
+        cost_usd=(result.cost_usd or 0.0) + (verdict.cost_usd or 0.0),
     )
     session.add(summary)
     session.add(AICall(
         model=result.model,
         prompt_tokens=result.prompt_tokens,
         completion_tokens=result.completion_tokens,
-        cost_usd=result.cost_usd,
+        cost_usd=(result.cost_usd or 0.0) + (verdict.cost_usd or 0.0),
         status="ok",
     ))
     return summary
@@ -338,6 +291,7 @@ async def run() -> None:
                         await translate_summary_for_active_languages(session, summary.id)
 
                 # One aggregate read across all groups, stored under __all__.
+                # Same JSON-mode + reviewer-agent path as per-group reads.
                 agg_system = build_aggregate_summary_system_prompt(tone, analysis)
                 agg_user = build_aggregate_summary_user_prompt(groups)
                 agg_summary: IndicatorSummary | None = None
@@ -346,28 +300,53 @@ async def run() -> None:
                         client,
                         [{"role": "system", "content": agg_system},
                          {"role": "user", "content": agg_user}],
-                        max_tokens=1500,  # room for reasoning + 80-word output
+                        max_tokens=1500,
+                        response_format={"type": "json_object"},
                     )
-                    agg_summary = IndicatorSummary(
-                        group_name=AGGREGATE_GROUP_NAME,
-                        generated_at=utcnow(),
-                        model=result.model,
-                        tone=tone,
-                        analysis=analysis,
-                        prompt_version=PROMPT_VERSION,
-                        content=clean_summary(result.content),
-                        prompt_tokens=result.prompt_tokens,
-                        completion_tokens=result.completion_tokens,
-                        cost_usd=result.cost_usd,
-                    )
-                    session.add(agg_summary)
-                    session.add(AICall(
-                        model=result.model,
-                        prompt_tokens=result.prompt_tokens,
-                        completion_tokens=result.completion_tokens,
-                        cost_usd=result.cost_usd, status="ok",
-                    ))
-                    written += 1
+                    candidate = _extract_read(result.content)
+                    if candidate is None or len(candidate) < 40:
+                        log.warning("ind_summary.agg_json_invalid",
+                                    tone=tone, preview=result.content[:160])
+                        session.add(AICall(
+                            model=result.model,
+                            prompt_tokens=result.prompt_tokens,
+                            completion_tokens=result.completion_tokens,
+                            cost_usd=result.cost_usd, status="leaked",
+                        ))
+                    else:
+                        verdict = await review_read(client, candidate)
+                        full_cost = (result.cost_usd or 0.0) + (verdict.cost_usd or 0.0)
+                        if not verdict.clean:
+                            log.warning("ind_summary.agg_reviewer_rejected",
+                                        tone=tone, reason=verdict.reason,
+                                        preview=candidate[:120])
+                            session.add(AICall(
+                                model=result.model,
+                                prompt_tokens=result.prompt_tokens,
+                                completion_tokens=result.completion_tokens,
+                                cost_usd=full_cost, status="leaked",
+                            ))
+                        else:
+                            agg_summary = IndicatorSummary(
+                                group_name=AGGREGATE_GROUP_NAME,
+                                generated_at=utcnow(),
+                                model=result.model,
+                                tone=tone,
+                                analysis=analysis,
+                                prompt_version=PROMPT_VERSION,
+                                content=candidate,
+                                prompt_tokens=result.prompt_tokens,
+                                completion_tokens=result.completion_tokens,
+                                cost_usd=full_cost,
+                            )
+                            session.add(agg_summary)
+                            session.add(AICall(
+                                model=result.model,
+                                prompt_tokens=result.prompt_tokens,
+                                completion_tokens=result.completion_tokens,
+                                cost_usd=full_cost, status="ok",
+                            ))
+                            written += 1
                 except Exception as e:
                     session.add(AICall(
                         model=active_model(), status="error",
diff --git a/app/services/llm_prompts.py b/app/services/llm_prompts.py
index 9840ec2..726b60a 100644
--- a/app/services/llm_prompts.py
+++ b/app/services/llm_prompts.py
@@ -296,12 +296,25 @@ question via the chat sidebar.
 def build_summary_system_prompt(tone: str, analysis: str) -> str:
     """A lean, focused system prompt for the per-indicator-group hourly
     summary. INTERPRETATION not description — the reader has the table
-    next to this paragraph; they don't need numbers recited at them."""
+    next to this paragraph; they don't need numbers recited at them.
+
+    Output is JSON-mode: the model must emit a single object
+    {"read": "..."}. The wrapper makes scratchpad outside the field
+    physically impossible — the API enforces well-formed JSON, and the
+    only schema slot is the publishable read. Scratchpad inside the
+    field is caught by the reviewer agent (services/output_review)."""
     tone_block = _TONE[_resolve_tone(tone)]
     analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"])
     return f"""You write a TINY interpretation (≤60 words, 2-3 sentences) \
 of ONE indicator group for a strategic markets dashboard.
 
+# Output format (strict)
+Return ONLY a single JSON object with exactly one field:
+{{"read": "<your 2-3 sentence interpretation>"}}
+Nothing outside that JSON object. No preamble. No markdown fences. \
+No additional fields. The "read" string is what the user sees verbatim, \
+so it must already be the finished, publishable text — never your thinking.
+
 # What this is for
 The reader is looking at the table of numbers right next to your text. \
 They can see the values. They CANNOT see the meaning. Your job is to \
@@ -316,19 +329,20 @@ Even at 2-3 sentences, contrast what the underlying factors justify \
 they don't diverge, say so in one clause. Never just describe the move \
 without placing it on this axis.
 
-# Hard constraints
+# Hard constraints on the "read" string
 - Plain prose, ONE paragraph. No markdown, no headers, no lists, no labels.
 - Open IMMEDIATELY with substance. NEVER start with: "I need to", "I'll", \
 "We need to", "We are asked", "Here's", "Let me", "Let's", "Sure", "Looking \
 at", "Based on", "Summary:", "The data shows", "First", "To address". No \
 meta-commentary at all.
+- No rhetorical questions, no "X? Actually Y?" self-corrections, no \
+parenthetical asides that question your own numbers. The text is the \
+finished read, not the thinking.
 - Cite at most 2-3 specific numbers and ONLY when they anchor an \
 interpretation. Don't list moves; explain them.
 - Multi-week / multi-month horizon. 1-day moves under 2% are noise — skip.
 - No buy/sell language. No predictions. No watch list. No TL;DR. No date \
 header. No "system temperature" line — that belongs to the full daily log.
-- Output the read directly. Do NOT include phrases like "Example", "Good \
-example", "Bad example", "Reference", or any meta-framing of your output.
 
 {tone_block}
 
@@ -350,13 +364,22 @@ def build_summary_user_prompt(group_name: str, quotes: list[dict]) -> str:
 
 def build_aggregate_summary_system_prompt(tone: str, analysis: str) -> str:
     """System prompt for the cross-group aggregate read shown on the dashboard.
-    Wider lens than a per-group summary — synthesise across all groups."""
+    Wider lens than a per-group summary — synthesise across all groups.
+
+    Same JSON-mode contract as build_summary_system_prompt: output is
+    {"read": "..."} only; the field is the publishable text verbatim."""
     tone_block = _TONE[_resolve_tone(tone)]
     analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"])
     return f"""You write a single SHORT cross-asset INTERPRETATION (≤80 \
 words, 2-4 sentences) for the dashboard header. The reader is glancing — \
 give them the meaning of the whole tape, not a recap.
 
+# Output format (strict)
+Return ONLY a single JSON object with exactly one field:
+{{"read": "<your 2-4 sentence cross-asset interpretation>"}}
+Nothing outside that JSON object. No preamble. No markdown fences. \
+No additional fields. The "read" string is what the user sees verbatim.
+
 # What this is for
 The reader can see every indicator on the dashboard below this paragraph. \
 Your job is NOT to summarise the moves. It is to explain what the moves, \
@@ -371,19 +394,19 @@ crowd is actually doing (irrational: positioning, narrative momentum, \
 flows). At least one of the 2-4 sentences must name this gap or, if the \
 two cohere, explicitly say so.
 
-# Hard constraints
+# Hard constraints on the "read" string
 - Plain prose, ONE paragraph. No markdown, headers, lists, or labels.
 - Open IMMEDIATELY with substance. NEVER start with: "I need to", "I'll", \
 "We need to", "Here's", "Let me", "Looking at", "Based on", "Sure", "Summary:", \
 "The data shows", "Across the board". No meta-commentary.
+- No rhetorical questions, no "X? Actually Y?" self-corrections, no \
+parenthetical asides that question your own numbers.
 - Identify the single most important **cross-asset implication**: e.g. \
 "rates and credit disagree", "equities outrun fundamentals", "geopolitical \
 risk premium is in commodities but not vol". Cite no more than 3 specific \
 numbers, and only as anchors for the interpretation.
 - Multi-week / multi-month horizon. 1-day moves under 2% are noise.
 - No buy/sell language. No predictions of specific levels.
-- Output the read directly. Do NOT include phrases like "Example", "Good \
-example", "Bad example", "Reference", or any meta-framing of your output.
 
 {tone_block}
 
diff --git a/app/services/output_review.py b/app/services/output_review.py
new file mode 100644
index 0000000..3af2a7a
--- /dev/null
+++ b/app/services/output_review.py
@@ -0,0 +1,107 @@
+"""Second-pass reviewer agent for AI-generated reads.
+
+The per-group and aggregate indicator summaries are generated in JSON
+mode and the publishable text comes out of a single "read" field, but a
+misbehaving model can still slip chain-of-thought INSIDE the field
+("Let's see…", "X? Actually Y?", multi-question parentheticals). This
+module makes a small second LLM call that judges the candidate read as
+clean / unclean. Cost is ~$0.0001 per check; latency ~1-2 s in the
+hourly job. No user-facing latency.
+
+The reviewer is deliberately a tiny, JSON-shaped classifier — same
+JSON-mode mechanism as the generator, so the verdict can't be lost in
+prose. If parsing fails or the call errors, the row is rejected
+(fail-safe: the previously cached good summary stays visible).
+"""
+from __future__ import annotations
+
+import json
+from dataclasses import dataclass
+
+import httpx
+
+from app.logging import get_logger
+from app.services.openrouter import call_llm
+
+log = get_logger("output_review")
+
+
+_SYSTEM_PROMPT = """\
+You are a strict editor for a financial-markets dashboard. The author
+was asked to produce a short interpretive read for human readers.
+You receive their proposed read and decide if it is publishable as-is.
+
+Mark CLEAN only if the text reads like a finished interpretation a
+reader could see on a public dashboard without confusion.
+
+Mark UNCLEAN if the text contains ANY of:
+- Chain-of-thought / scratchpad markers used as thinking — phrases like
+  "Let me", "Let's see", "we need to", "actually" (correcting itself),
+  "wait", "hmm", "or rather", "I should".
+- Self-questioning parentheticals: "Q1 2026? Actually Q4 2025?",
+  "is it X or Y?", any place where the author appears to be working
+  out the answer in front of the reader.
+- Multiple rhetorical questions or any question that interrupts the
+  declarative voice. A clean interpretive read is assertive.
+- Meta-commentary about the task, output format, word limits, or
+  instructions — e.g. "as required by the constraints", "the prompt
+  asks", "let me address each".
+- Partial / truncated content. Starts mid-word, mid-number, mid-clause.
+- Visible internal numbers without clear meaning ("change 1y +5.9%?"),
+  raw column names ("as_of 2026-01-01"), or any debug-like fragments.
+- Anything other than the finished, publishable interpretation.
+
+Return ONLY a JSON object with this exact shape:
+{"clean": true | false, "reason": "<≤20 words, plain text>"}
+No preamble, no markdown fences, no other fields.
+"""
+
+
+@dataclass(frozen=True)
+class Verdict:
+    clean: bool
+    reason: str
+    cost_usd: float | None  # cost of the review call itself, for the ledger
+
+
+async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict:
+    """Ask the LLM whether `candidate` is a publishable read.
+
+    Returns Verdict(clean, reason, cost). Any error — provider failure,
+    JSON parse failure, missing field, wrong type — yields a CONSERVATIVE
+    verdict (clean=False) so the caller drops the candidate. The
+    previously cached good summary stays visible on the dashboard."""
+    if not candidate or not candidate.strip():
+        return Verdict(clean=False, reason="empty candidate", cost_usd=0.0)
+
+    messages = [
+        {"role": "system", "content": _SYSTEM_PROMPT},
+        # Sent as a fenced user turn so the model can't confuse the
+        # candidate with instructions, even if the candidate happens to
+        # contain prompt-like prose.
+        {"role": "user", "content": f"Candidate read:\n```\n{candidate}\n```"},
+    ]
+    try:
+        result = await call_llm(
+            client, messages,
+            max_tokens=120,
+            response_format={"type": "json_object"},
+        )
+    except Exception as e:
+        log.warning("review.call_failed", error=str(e)[:200])
+        return Verdict(clean=False, reason=f"reviewer error: {str(e)[:80]}",
+                       cost_usd=None)
+
+    try:
+        parsed = json.loads(result.content)
+    except json.JSONDecodeError:
+        log.warning("review.parse_failed", preview=result.content[:200])
+        return Verdict(clean=False, reason="reviewer returned non-JSON",
+                       cost_usd=result.cost_usd)
+
+    clean = parsed.get("clean")
+    reason = parsed.get("reason") or ""
+    if not isinstance(clean, bool):
+        return Verdict(clean=False, reason="reviewer omitted bool 'clean'",
+                       cost_usd=result.cost_usd)
+    return Verdict(clean=clean, reason=str(reason)[:200], cost_usd=result.cost_usd)
diff --git a/tests/test_output_review.py b/tests/test_output_review.py
new file mode 100644
index 0000000..53f0b34
--- /dev/null
+++ b/tests/test_output_review.py
@@ -0,0 +1,146 @@
+"""Tests for the JSON-envelope extractor and the reviewer agent.
+
+The two together replaced the regex `clean_summary` + `looks_like_leakage`
+scaffolding that used to live in indicator_summary_job. The extractor is
+pure-function so it's covered exhaustively; the reviewer makes an LLM
+call and is exercised via the httpx MockTransport that the other
+openrouter tests use."""
+from __future__ import annotations
+
+import httpx
+import pytest
+
+from app.jobs.indicator_summary_job import _extract_read
+from app.services import openrouter as ot
+from app.services.output_review import review_read
+
+
+# ---------------------------------------------------------------------------
+# _extract_read — JSON envelope handling
+# ---------------------------------------------------------------------------
+
+
+def test_extract_read_returns_trimmed_field():
+    raw = '{"read": "  The market is pricing growth.  "}'
+    assert _extract_read(raw) == "The market is pricing growth."
+
+
+def test_extract_read_returns_none_on_invalid_json():
+    assert _extract_read("not json") is None
+    assert _extract_read("{bad}") is None
+    assert _extract_read("") is None
+
+
+def test_extract_read_returns_none_when_field_missing():
+    assert _extract_read('{"other": "x"}') is None
+
+
+def test_extract_read_returns_none_when_field_not_string():
+    assert _extract_read('{"read": 42}') is None
+    assert _extract_read('{"read": null}') is None
+    assert _extract_read('{"read": ["a","b"]}') is None
+
+
+def test_extract_read_returns_none_when_field_empty():
+    assert _extract_read('{"read": ""}') is None
+    assert _extract_read('{"read": "   "}') is None
+
+
+def test_extract_read_returns_none_when_envelope_not_object():
+    # A bare string or array is valid JSON but not the expected shape.
+    assert _extract_read('"just a string"') is None
+    assert _extract_read('["a", "b"]') is None
+
+
+# ---------------------------------------------------------------------------
+# review_read — judges candidate read via a second LLM call
+# ---------------------------------------------------------------------------
+
+
+def _mock_post(handler):
+    return httpx.MockTransport(handler)
+
+
+def _configure(monkeypatch):
+    """Minimal env so call_llm believes a provider is configured."""
+    monkeypatch.setattr(ot, "get_settings", lambda: type("S", (), {
+        "LLM_PROVIDER": "deepseek", "LLM_FALLBACK": "",
+        "DEEPSEEK_API_KEY": "sk-d", "OPENROUTER_API_KEY": "",
+        "DEEPSEEK_URL": "https://x/deepseek", "DEEPSEEK_MODEL": "deepseek-v4-flash",
+        "OPENROUTER_URL": "https://x/or",      "OPENROUTER_MODEL": "deepseek/deepseek-v4-flash",
+    })())
+
+
+@pytest.mark.asyncio
+async def test_review_clean_verdict(monkeypatch):
+    _configure(monkeypatch)
+    def handler(_req):
+        return httpx.Response(200, json={
+            "choices": [{"message": {"content": '{"clean": true, "reason": "ok"}'},
+                         "finish_reason": "stop"}],
+            "usage": {"prompt_tokens": 50, "completion_tokens": 12, "cost": 0.00007},
+        })
+    async with httpx.AsyncClient(transport=_mock_post(handler)) as client:
+        v = await review_read(client, "Markets are pricing tighter policy.")
+    assert v.clean is True
+    assert v.cost_usd == 0.00007
+
+
+@pytest.mark.asyncio
+async def test_review_unclean_verdict(monkeypatch):
+    _configure(monkeypatch)
+    def handler(_req):
+        return httpx.Response(200, json={
+            "choices": [{"message": {"content":
+                         '{"clean": false, "reason": "chain of thought"}'},
+                         "finish_reason": "stop"}],
+            "usage": {"prompt_tokens": 50, "completion_tokens": 14, "cost": 0.00009},
+        })
+    async with httpx.AsyncClient(transport=_mock_post(handler)) as client:
+        v = await review_read(client, "Let's see, is it X? Actually Y?")
+    assert v.clean is False
+    assert "chain of thought" in v.reason
+
+
+@pytest.mark.asyncio
+async def test_review_failsafe_on_malformed_json(monkeypatch):
+    """Reviewer returned prose instead of JSON → conservative reject."""
+    _configure(monkeypatch)
+    def handler(_req):
+        return httpx.Response(200, json={
+            "choices": [{"message": {"content": "yes it looks clean"},
+                         "finish_reason": "stop"}],
+            "usage": {"prompt_tokens": 50, "completion_tokens": 6},
+        })
+    async with httpx.AsyncClient(transport=_mock_post(handler)) as client:
+        v = await review_read(client, "Some candidate.")
+    assert v.clean is False
+    assert "non-JSON" in v.reason
+
+
+@pytest.mark.asyncio
+async def test_review_failsafe_on_missing_clean_field(monkeypatch):
+    _configure(monkeypatch)
+    def handler(_req):
+        return httpx.Response(200, json={
+            "choices": [{"message": {"content": '{"reason": "no field"}'},
+                         "finish_reason": "stop"}],
+            "usage": {"prompt_tokens": 50, "completion_tokens": 6},
+        })
+    async with httpx.AsyncClient(transport=_mock_post(handler)) as client:
+        v = await review_read(client, "Some candidate.")
+    assert v.clean is False
+
+
+@pytest.mark.asyncio
+async def test_review_failsafe_on_empty_candidate(monkeypatch):
+    """No LLM call should fire if the candidate is empty."""
+    _configure(monkeypatch)
+    calls = []
+    def handler(_req):
+        calls.append(1)
+        return httpx.Response(500, json={"error": "should not be called"})
+    async with httpx.AsyncClient(transport=_mock_post(handler)) as client:
+        v = await review_read(client, "   ")
+    assert v.clean is False
+    assert calls == []