From 8b9d3c9c3e5e42a45584fe05cd842113c3bbc8c6 Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Fri, 29 May 2026 13:16:57 +0200 Subject: [PATCH] =?UTF-8?q?ai:=20bump=20reviewer=20max=5Ftokens=20300=20?= =?UTF-8?q?=E2=86=92=20800?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Live re-check on 50 recent IndicatorSummary rows after the previous 120 → 300 bump still produced 4 'reviewer returned non-JSON' verdicts out of 12 rejections. DeepSeek-V4-flash sometimes prefixes its JSON output with a short stretch of thinking even though response_format is enforced, which truncates the JSON at the back end of the 300-token cap. 800 tokens is comfortably above any realistic verdict + preamble at ~$0.00022/call (DeepSeek output rates). Negligible cost given the hourly call volume. Co-Authored-By: Claude Opus 4.7 --- app/services/output_review.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/app/services/output_review.py b/app/services/output_review.py index cdf545d..f228a74 100644 --- a/app/services/output_review.py +++ b/app/services/output_review.py @@ -84,15 +84,14 @@ async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict: try: result = await call_llm( client, messages, - # 300 tokens is comfortably above the 30-token JSON verdict - # the prompt asks for. An earlier 120-token cap was producing - # frequent finish_reason=length cutoffs that left the JSON - # half-written ('{"clean": false, "reason": "Text…'), which - # the parser then rejected as malformed — a false-negative - # in the verdict. The extra headroom costs ~$0.00015 per - # call (DeepSeek output rates) and removes that whole class - # of artefact. - max_tokens=300, + # 800 tokens is well above the ~30-token JSON verdict the + # prompt asks for. The reviewer model (DeepSeek-V4-flash) + # occasionally pads with its own thinking before the JSON + # even though response_format is enforced; smaller caps + # (120, 300) produced finish_reason=length cutoffs that + # left the JSON half-written and broke the parser. 800 + # removes the artefact entirely at ~$0.00022 per call. + max_tokens=800, response_format={"type": "json_object"}, ) except Exception as e: