From 8e7ea673ce3b557acaae3be2dca46e0306763957 Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Fri, 29 May 2026 16:04:40 +0200 Subject: [PATCH] =?UTF-8?q?analyze:=20bump=20max=5Ftokens=202000=20?= =?UTF-8?q?=E2=86=92=204000=20for=20portfolio=20analysis?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Logs (analyze.lang_resolved → portfolio_analysis.reviewer_rejected chain on 2026-05-29) showed the lang directive was working — the model was producing Italian — but the reviewer was rejecting every response as truncated mid-word ("supera i mass", "INRG +8"). The analyze endpoint then returns 502 and the frontend keeps showing whatever stale English row was last cached in localStorage, so from the user's POV the analysis "is still in English". Same shape as the strategic-log translation cap we fixed earlier: the prompt targets ~350 English words, IT runs ~25-35% longer in tokens, and DeepSeek-V4-flash bills internal reasoning against the same budget. At 2000 we ran out of room mid-sentence. 4000 is well above the longest realistic Italian output; cost is bounded by tokens actually emitted, not the cap. Co-Authored-By: Claude Opus 4.7 --- app/services/portfolio_analysis.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/app/services/portfolio_analysis.py b/app/services/portfolio_analysis.py index dfd3cb1..9de6383 100644 --- a/app/services/portfolio_analysis.py +++ b/app/services/portfolio_analysis.py @@ -344,7 +344,17 @@ async def analyse( {"role": "system", "content": system}, {"role": "user", "content": user}, ], - max_tokens=2000, + # 4000 not 2000. Italian / Spanish / French / German + # output runs ~25-35% longer in tokens than English; on + # top of that DeepSeek-V4-flash bills its internal + # reasoning against the same budget. At 2000 we + # repeatedly hit finish_reason=length mid-sentence, + # which the reviewer agent then correctly flags as + # truncated and rejects — the user ends up looking at + # whatever stale row was last cached. 4000 leaves + # ample headroom; we only pay for tokens actually + # emitted, not the cap itself. + max_tokens=4000, ) status = "ok" error_msg = None