From 0060166d324dbe4e4978b6fd65fbe0fff7c612e1 Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Fri, 29 May 2026 16:44:27 +0200 Subject: [PATCH] review: per-surface rider, loosen for portfolio commentary Reviewer was rejecting legitimate IT portfolio analyses, citing descriptive risk language as actionable advice: reason: "Allocation guidance throughout: 'concentrazione gestibile', 'non eliminabile', 'bassa esposizione', 'va monitorato'. Treats portfolio construction as actionable." These phrases describe portfolio state (manageable concentration, non-eliminable risk, low exposure, warrants monitoring) without directing the user to take action. They are exactly the kind of prose a portfolio commentary surface is supposed to produce. The reviewer's generic "no financial advice" rule is too broad here. Add a `surface` parameter to review_read() with a per-surface rider mechanism (_SURFACE_RIDERS). The "portfolio" rider: - Lists DESCRIPTIVE phrasings that are EXPLICITLY permitted: attribute naming ("high concentration", "currency exposure"), thesis invalidation conditions, impersonal observations about a position's sensitivity. - Tightens the reject list to EXPLICIT calls to action: imperative verbs aimed at the reader, "you should", "consider X-ing", specific allocation prescriptions, price-target predictions. portfolio_analysis.analyse() now passes surface="portfolio". All other reviewer call sites (indicator summary, log, chat, digest) default to surface=None and keep the generic rules. tests/conftest.py's autouse review_read stub picks up **_kw so adding new keyword arguments to review_read doesn't keep breaking the locale-integration tests. Co-Authored-By: Claude Opus 4.7 --- app/services/output_review.py | 52 ++++++++++++++++++++++++++++-- app/services/portfolio_analysis.py | 8 ++++- tests/conftest.py | 2 +- 3 files changed, 57 insertions(+), 5 deletions(-) diff --git a/app/services/output_review.py b/app/services/output_review.py index 0fa6a9a..af96ffa 100644 --- a/app/services/output_review.py +++ b/app/services/output_review.py @@ -101,6 +101,38 @@ No preamble, no markdown fences, no other fields. """ +# Surface-specific rider appended to the system prompt when the caller +# passes a known `surface` to review_read(). Lets us relax or tighten +# rules per editorial context without rewriting the whole prompt. +_SURFACE_RIDERS = { + "portfolio": """\ + +# Surface: portfolio commentary +This text describes a real investor's holdings. DESCRIPTIVE risk +language is the whole point of this surface and must NOT be flagged +as financial advice. The following ARE fine: +- Naming portfolio attributes: "high concentration", "single-name + exposure", "currency risk is unhedged", "FX exposure", "elevated + risk", "stretched valuations", "concentration is manageable", "low + diversification". +- Stating what would invalidate the posture: "this view fails if + rates retrace", "the thesis depends on X holding". +- Impersonal observation about a position's behaviour or sensitivity: + "the position warrants monitoring", "carries vulnerability to a + policy shock", "is sensitive to rate moves". + +ONLY flag EXPLICIT calls to action where a verb or directive is +aimed at the reader: +- Imperative verbs in the second person: "buy X", "sell Y", + "trim Z", "hedge", "rotate into". +- "You should", "investors should", "consider X-ing", "we recommend". +- Specific allocation prescriptions: "go 20% bonds", "overweight + tech", "underweight defensives". +- Price-target predictions: "will reach $X by year-end". +""", +} + + @dataclass(frozen=True) class Verdict: clean: bool @@ -108,18 +140,32 @@ class Verdict: cost_usd: float | None # cost of the review call itself, for the ledger -async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict: +async def review_read( + client: httpx.AsyncClient, + candidate: str, + surface: str | None = None, +) -> Verdict: """Ask the LLM whether `candidate` is a publishable read. Returns Verdict(clean, reason, cost). Any error — provider failure, JSON parse failure, missing field, wrong type — yields a CONSERVATIVE verdict (clean=False) so the caller drops the candidate. The - previously cached good summary stays visible on the dashboard.""" + previously cached good summary stays visible on the dashboard. + + `surface` selects a surface-specific rider that's appended to the + base system prompt — see _SURFACE_RIDERS. Currently only the + "portfolio" surface uses one (descriptive risk language is the + whole point there and shouldn't be flagged as advice). Unknown + or None surfaces fall back to the generic rules.""" if not candidate or not candidate.strip(): return Verdict(clean=False, reason="empty candidate", cost_usd=0.0) + system_prompt = _SYSTEM_PROMPT + if surface and surface in _SURFACE_RIDERS: + system_prompt = system_prompt + _SURFACE_RIDERS[surface] + messages = [ - {"role": "system", "content": _SYSTEM_PROMPT}, + {"role": "system", "content": system_prompt}, # Sent as a fenced user turn so the model can't confuse the # candidate with instructions, even if the candidate happens to # contain prompt-like prose. diff --git a/app/services/portfolio_analysis.py b/app/services/portfolio_analysis.py index 9cb84c6..bd51f89 100644 --- a/app/services/portfolio_analysis.py +++ b/app/services/portfolio_analysis.py @@ -365,8 +365,14 @@ async def analyse( # buy/sell or allocation language is a regulatory hazard. Drop # the response on a reject and surface a retry-able error to the # caller; no analysis is ever persisted server-side anyway. + # surface="portfolio" applies a rider that loosens the generic + # "no advice" rule to permit descriptive risk language + # ("concentration is high", "currency exposure is unhedged", + # "the position warrants monitoring") which is the actual + # purpose of this surface, while keeping explicit + # buy/sell/allocation directives forbidden. if llm is not None: - verdict = await review_read(client, llm.content) + verdict = await review_read(client, llm.content, surface="portfolio") review_cost = verdict.cost_usd or 0.0 if not verdict.clean: status = "leaked" diff --git a/tests/conftest.py b/tests/conftest.py index e49e229..279775d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -36,7 +36,7 @@ def stub_reviewer(monkeypatch): """ from app.services.output_review import Verdict - async def _clean(_client, _candidate): + async def _clean(_client, _candidate, **_kw): return Verdict(clean=True, reason="stubbed-by-conftest", cost_usd=0.0) for mod_path in (