From 0550063316d14a5811cf9a9d30837b90f318adaf Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Fri, 29 May 2026 13:15:42 +0200 Subject: [PATCH] =?UTF-8?q?ai:=20bump=20reviewer=20max=5Ftokens=20120=20?= =?UTF-8?q?=E2=86=92=20300?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A live sanity-check on 50 recent IndicatorSummary rows found 6 of 10 reviewer rejections were the reviewer hitting its own max_tokens cap mid-verdict ('{"clean": false, "reason": "Truncated sent…'). The parser then dropped the candidate as malformed JSON, producing a false-negative verdict that would have purged legitimately clean rows. 300 tokens is well above the ~30-token verdict the prompt asks for; the extra headroom removes the artefact at ~$0.00015 per call. Co-Authored-By: Claude Opus 4.7 --- app/services/output_review.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/app/services/output_review.py b/app/services/output_review.py index 3af2a7a..cdf545d 100644 --- a/app/services/output_review.py +++ b/app/services/output_review.py @@ -84,7 +84,15 @@ async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict: try: result = await call_llm( client, messages, - max_tokens=120, + # 300 tokens is comfortably above the 30-token JSON verdict + # the prompt asks for. An earlier 120-token cap was producing + # frequent finish_reason=length cutoffs that left the JSON + # half-written ('{"clean": false, "reason": "Text…'), which + # the parser then rejected as malformed — a false-negative + # in the verdict. The extra headroom costs ~$0.00015 per + # call (DeepSeek output rates) and removes that whole class + # of artefact. + max_tokens=300, response_format={"type": "json_object"}, ) except Exception as e: