diff --git a/app/services/output_review.py b/app/services/output_review.py
index cdf545d..f228a74 100644
--- a/app/services/output_review.py
+++ b/app/services/output_review.py
@@ -84,15 +84,14 @@ async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict:
     try:
         result = await call_llm(
             client, messages,
-            # 300 tokens is comfortably above the 30-token JSON verdict
-            # the prompt asks for. An earlier 120-token cap was producing
-            # frequent finish_reason=length cutoffs that left the JSON
-            # half-written ('{"clean": false, "reason": "Text…'), which
-            # the parser then rejected as malformed — a false-negative
-            # in the verdict. The extra headroom costs ~$0.00015 per
-            # call (DeepSeek output rates) and removes that whole class
-            # of artefact.
-            max_tokens=300,
+            # 800 tokens is well above the ~30-token JSON verdict the
+            # prompt asks for. The reviewer model (DeepSeek-V4-flash)
+            # occasionally pads with its own thinking before the JSON
+            # even though response_format is enforced; smaller caps
+            # (120, 300) produced finish_reason=length cutoffs that
+            # left the JSON half-written and broke the parser. 800
+            # removes the artefact entirely at ~$0.00022 per call.
+            max_tokens=800,
             response_format={"type": "json_object"},
         )
     except Exception as e: