diff --git a/app/services/output_review.py b/app/services/output_review.py index 3af2a7a..cdf545d 100644 --- a/app/services/output_review.py +++ b/app/services/output_review.py @@ -84,7 +84,15 @@ async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict: try: result = await call_llm( client, messages, - max_tokens=120, + # 300 tokens is comfortably above the 30-token JSON verdict + # the prompt asks for. An earlier 120-token cap was producing + # frequent finish_reason=length cutoffs that left the JSON + # half-written ('{"clean": false, "reason": "Text…'), which + # the parser then rejected as malformed — a false-negative + # in the verdict. The extra headroom costs ~$0.00015 per + # call (DeepSeek output rates) and removes that whole class + # of artefact. + max_tokens=300, response_format={"type": "json_object"}, ) except Exception as e: