diff --git a/app/services/output_review.py b/app/services/output_review.py
index 3af2a7a..cdf545d 100644
--- a/app/services/output_review.py
+++ b/app/services/output_review.py
@@ -84,7 +84,15 @@ async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict:
     try:
         result = await call_llm(
             client, messages,
-            max_tokens=120,
+            # 300 tokens is comfortably above the 30-token JSON verdict
+            # the prompt asks for. An earlier 120-token cap was producing
+            # frequent finish_reason=length cutoffs that left the JSON
+            # half-written ('{"clean": false, "reason": "Text…'), which
+            # the parser then rejected as malformed — a false-negative
+            # in the verdict. The extra headroom costs ~$0.00015 per
+            # call (DeepSeek output rates) and removes that whole class
+            # of artefact.
+            max_tokens=300,
             response_format={"type": "json_object"},
         )
     except Exception as e: