From 8b9d3c9c3e5e42a45584fe05cd842113c3bbc8c6 Mon Sep 17 00:00:00 2001
From: Giorgio Gilestro <giorgio@gilest.ro>
Date: Fri, 29 May 2026 13:16:57 +0200
Subject: [PATCH] =?UTF-8?q?ai:=20bump=20reviewer=20max=5Ftokens=20300=20?=
 =?UTF-8?q?=E2=86=92=20800?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Live re-check on 50 recent IndicatorSummary rows after the previous
120 → 300 bump still produced 4 'reviewer returned non-JSON' verdicts
out of 12 rejections. DeepSeek-V4-flash sometimes prefixes its JSON
output with a short stretch of thinking even though response_format
is enforced, which truncates the JSON at the back end of the 300-token
cap.

800 tokens is comfortably above any realistic verdict + preamble at
~$0.00022/call (DeepSeek output rates). Negligible cost given the
hourly call volume.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 app/services/output_review.py | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/app/services/output_review.py b/app/services/output_review.py
index cdf545d..f228a74 100644
--- a/app/services/output_review.py
+++ b/app/services/output_review.py
@@ -84,15 +84,14 @@ async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict:
     try:
         result = await call_llm(
             client, messages,
-            # 300 tokens is comfortably above the 30-token JSON verdict
-            # the prompt asks for. An earlier 120-token cap was producing
-            # frequent finish_reason=length cutoffs that left the JSON
-            # half-written ('{"clean": false, "reason": "Text…'), which
-            # the parser then rejected as malformed — a false-negative
-            # in the verdict. The extra headroom costs ~$0.00015 per
-            # call (DeepSeek output rates) and removes that whole class
-            # of artefact.
-            max_tokens=300,
+            # 800 tokens is well above the ~30-token JSON verdict the
+            # prompt asks for. The reviewer model (DeepSeek-V4-flash)
+            # occasionally pads with its own thinking before the JSON
+            # even though response_format is enforced; smaller caps
+            # (120, 300) produced finish_reason=length cutoffs that
+            # left the JSON half-written and broke the parser. 800
+            # removes the artefact entirely at ~$0.00022 per call.
+            max_tokens=800,
             response_format={"type": "json_object"},
         )
     except Exception as e: