From 0550063316d14a5811cf9a9d30837b90f318adaf Mon Sep 17 00:00:00 2001
From: Giorgio Gilestro <giorgio@gilest.ro>
Date: Fri, 29 May 2026 13:15:42 +0200
Subject: [PATCH] =?UTF-8?q?ai:=20bump=20reviewer=20max=5Ftokens=20120=20?=
 =?UTF-8?q?=E2=86=92=20300?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A live sanity-check on 50 recent IndicatorSummary rows found 6 of 10
reviewer rejections were the reviewer hitting its own max_tokens cap
mid-verdict ('{"clean": false, "reason": "Truncated sent…'). The
parser then dropped the candidate as malformed JSON, producing a
false-negative verdict that would have purged legitimately clean
rows.

300 tokens is well above the ~30-token verdict the prompt asks for;
the extra headroom removes the artefact at ~$0.00015 per call.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 app/services/output_review.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/app/services/output_review.py b/app/services/output_review.py
index 3af2a7a..cdf545d 100644
--- a/app/services/output_review.py
+++ b/app/services/output_review.py
@@ -84,7 +84,15 @@ async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict:
     try:
         result = await call_llm(
             client, messages,
-            max_tokens=120,
+            # 300 tokens is comfortably above the 30-token JSON verdict
+            # the prompt asks for. An earlier 120-token cap was producing
+            # frequent finish_reason=length cutoffs that left the JSON
+            # half-written ('{"clean": false, "reason": "Text…'), which
+            # the parser then rejected as malformed — a false-negative
+            # in the verdict. The extra headroom costs ~$0.00015 per
+            # call (DeepSeek output rates) and removes that whole class
+            # of artefact.
+            max_tokens=300,
             response_format={"type": "json_object"},
         )
     except Exception as e: