From 0060166d324dbe4e4978b6fd65fbe0fff7c612e1 Mon Sep 17 00:00:00 2001
From: Giorgio Gilestro <giorgio@gilest.ro>
Date: Fri, 29 May 2026 16:44:27 +0200
Subject: [PATCH] review: per-surface rider, loosen for portfolio commentary

Reviewer was rejecting legitimate IT portfolio analyses, citing
descriptive risk language as actionable advice:

  reason: "Allocation guidance throughout: 'concentrazione gestibile',
  'non eliminabile', 'bassa esposizione', 'va monitorato'. Treats
  portfolio construction as actionable."

These phrases describe portfolio state (manageable concentration,
non-eliminable risk, low exposure, warrants monitoring) without
directing the user to take action. They are exactly the kind of
prose a portfolio commentary surface is supposed to produce. The
reviewer's generic "no financial advice" rule is too broad here.

Add a `surface` parameter to review_read() with a per-surface rider
mechanism (_SURFACE_RIDERS). The "portfolio" rider:

- Lists DESCRIPTIVE phrasings that are EXPLICITLY permitted:
  attribute naming ("high concentration", "currency exposure"),
  thesis invalidation conditions, impersonal observations about a
  position's sensitivity.
- Tightens the reject list to EXPLICIT calls to action: imperative
  verbs aimed at the reader, "you should", "consider X-ing",
  specific allocation prescriptions, price-target predictions.

portfolio_analysis.analyse() now passes surface="portfolio". All
other reviewer call sites (indicator summary, log, chat, digest)
default to surface=None and keep the generic rules.

tests/conftest.py's autouse review_read stub picks up **_kw so
adding new keyword arguments to review_read doesn't keep breaking
the locale-integration tests.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 app/services/output_review.py      | 52 ++++++++++++++++++++++++++++--
 app/services/portfolio_analysis.py |  8 ++++-
 tests/conftest.py                  |  2 +-
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/app/services/output_review.py b/app/services/output_review.py
index 0fa6a9a..af96ffa 100644
--- a/app/services/output_review.py
+++ b/app/services/output_review.py
@@ -101,6 +101,38 @@ No preamble, no markdown fences, no other fields.
 """
 
 
+# Surface-specific rider appended to the system prompt when the caller
+# passes a known `surface` to review_read(). Lets us relax or tighten
+# rules per editorial context without rewriting the whole prompt.
+_SURFACE_RIDERS = {
+    "portfolio": """\
+
+# Surface: portfolio commentary
+This text describes a real investor's holdings. DESCRIPTIVE risk
+language is the whole point of this surface and must NOT be flagged
+as financial advice. The following ARE fine:
+- Naming portfolio attributes: "high concentration", "single-name
+  exposure", "currency risk is unhedged", "FX exposure", "elevated
+  risk", "stretched valuations", "concentration is manageable", "low
+  diversification".
+- Stating what would invalidate the posture: "this view fails if
+  rates retrace", "the thesis depends on X holding".
+- Impersonal observation about a position's behaviour or sensitivity:
+  "the position warrants monitoring", "carries vulnerability to a
+  policy shock", "is sensitive to rate moves".
+
+ONLY flag EXPLICIT calls to action where a verb or directive is
+aimed at the reader:
+- Imperative verbs in the second person: "buy X", "sell Y",
+  "trim Z", "hedge", "rotate into".
+- "You should", "investors should", "consider X-ing", "we recommend".
+- Specific allocation prescriptions: "go 20% bonds", "overweight
+  tech", "underweight defensives".
+- Price-target predictions: "will reach $X by year-end".
+""",
+}
+
+
 @dataclass(frozen=True)
 class Verdict:
     clean: bool
@@ -108,18 +140,32 @@ class Verdict:
     cost_usd: float | None  # cost of the review call itself, for the ledger
 
 
-async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict:
+async def review_read(
+    client: httpx.AsyncClient,
+    candidate: str,
+    surface: str | None = None,
+) -> Verdict:
     """Ask the LLM whether `candidate` is a publishable read.
 
     Returns Verdict(clean, reason, cost). Any error — provider failure,
     JSON parse failure, missing field, wrong type — yields a CONSERVATIVE
     verdict (clean=False) so the caller drops the candidate. The
-    previously cached good summary stays visible on the dashboard."""
+    previously cached good summary stays visible on the dashboard.
+
+    `surface` selects a surface-specific rider that's appended to the
+    base system prompt — see _SURFACE_RIDERS. Currently only the
+    "portfolio" surface uses one (descriptive risk language is the
+    whole point there and shouldn't be flagged as advice). Unknown
+    or None surfaces fall back to the generic rules."""
     if not candidate or not candidate.strip():
         return Verdict(clean=False, reason="empty candidate", cost_usd=0.0)
 
+    system_prompt = _SYSTEM_PROMPT
+    if surface and surface in _SURFACE_RIDERS:
+        system_prompt = system_prompt + _SURFACE_RIDERS[surface]
+
     messages = [
-        {"role": "system", "content": _SYSTEM_PROMPT},
+        {"role": "system", "content": system_prompt},
         # Sent as a fenced user turn so the model can't confuse the
         # candidate with instructions, even if the candidate happens to
         # contain prompt-like prose.
diff --git a/app/services/portfolio_analysis.py b/app/services/portfolio_analysis.py
index 9cb84c6..bd51f89 100644
--- a/app/services/portfolio_analysis.py
+++ b/app/services/portfolio_analysis.py
@@ -365,8 +365,14 @@ async def analyse(
         # buy/sell or allocation language is a regulatory hazard. Drop
         # the response on a reject and surface a retry-able error to the
         # caller; no analysis is ever persisted server-side anyway.
+        # surface="portfolio" applies a rider that loosens the generic
+        # "no advice" rule to permit descriptive risk language
+        # ("concentration is high", "currency exposure is unhedged",
+        # "the position warrants monitoring") which is the actual
+        # purpose of this surface, while keeping explicit
+        # buy/sell/allocation directives forbidden.
         if llm is not None:
-            verdict = await review_read(client, llm.content)
+            verdict = await review_read(client, llm.content, surface="portfolio")
             review_cost = verdict.cost_usd or 0.0
             if not verdict.clean:
                 status = "leaked"
diff --git a/tests/conftest.py b/tests/conftest.py
index e49e229..279775d 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -36,7 +36,7 @@ def stub_reviewer(monkeypatch):
     """
     from app.services.output_review import Verdict
 
-    async def _clean(_client, _candidate):
+    async def _clean(_client, _candidate, **_kw):
         return Verdict(clean=True, reason="stubbed-by-conftest", cost_usd=0.0)
 
     for mod_path in (