review: per-surface rider, loosen for portfolio commentary

Reviewer was rejecting legitimate IT portfolio analyses, citing descriptive risk language as actionable advice: reason: "Allocation guidance throughout: 'concentrazione gestibile', 'non eliminabile', 'bassa esposizione', 'va monitorato'. Treats portfolio construction as actionable." These phrases describe portfolio state (manageable concentration, non-eliminable risk, low exposure, warrants monitoring) without directing the user to take action. They are exactly the kind of prose a portfolio commentary surface is supposed to produce. The reviewer's generic "no financial advice" rule is too broad here. Add a `surface` parameter to review_read() with a per-surface rider mechanism (_SURFACE_RIDERS). The "portfolio" rider: - Lists DESCRIPTIVE phrasings that are EXPLICITLY permitted: attribute naming ("high concentration", "currency exposure"), thesis invalidation conditions, impersonal observations about a position's sensitivity. - Tightens the reject list to EXPLICIT calls to action: imperative verbs aimed at the reader, "you should", "consider X-ing", specific allocation prescriptions, price-target predictions. portfolio_analysis.analyse() now passes surface="portfolio". All other reviewer call sites (indicator summary, log, chat, digest) default to surface=None and keep the generic rules. tests/conftest.py's autouse review_read stub picks up **_kw so adding new keyword arguments to review_read doesn't keep breaking the locale-integration tests. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-29 16:44:27 +02:00 · 2026-05-29 16:44:27 +02:00 · 0060166d32
commit 0060166d32
parent d47b310898
3 changed files with 57 additions and 5 deletions
--- a/app/services/output_review.py
+++ b/app/services/output_review.py
@ -101,6 +101,38 @@ No preamble, no markdown fences, no other fields.
 """


+# Surface-specific rider appended to the system prompt when the caller
+# passes a known `surface` to review_read(). Lets us relax or tighten
+# rules per editorial context without rewriting the whole prompt.
+_SURFACE_RIDERS = {
+    "portfolio": """\
+
+# Surface: portfolio commentary
+This text describes a real investor's holdings. DESCRIPTIVE risk
+language is the whole point of this surface and must NOT be flagged
+as financial advice. The following ARE fine:
+- Naming portfolio attributes: "high concentration", "single-name
+  exposure", "currency risk is unhedged", "FX exposure", "elevated
+  risk", "stretched valuations", "concentration is manageable", "low
+  diversification".
+- Stating what would invalidate the posture: "this view fails if
+  rates retrace", "the thesis depends on X holding".
+- Impersonal observation about a position's behaviour or sensitivity:
+  "the position warrants monitoring", "carries vulnerability to a
+  policy shock", "is sensitive to rate moves".
+
+ONLY flag EXPLICIT calls to action where a verb or directive is
+aimed at the reader:
+- Imperative verbs in the second person: "buy X", "sell Y",
+  "trim Z", "hedge", "rotate into".
+- "You should", "investors should", "consider X-ing", "we recommend".
+- Specific allocation prescriptions: "go 20% bonds", "overweight
+  tech", "underweight defensives".
+- Price-target predictions: "will reach $X by year-end".
+""",
+}
+
+
@dataclass(frozen=True)
 class Verdict:
    clean: bool
@ -108,18 +140,32 @@ class Verdict:
    cost_usd: float | None  # cost of the review call itself, for the ledger


-async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict:
+async def review_read(
+    client: httpx.AsyncClient,
+    candidate: str,
+    surface: str | None = None,
+) -> Verdict:
    """Ask the LLM whether `candidate` is a publishable read.

    Returns Verdict(clean, reason, cost). Any error — provider failure,
    JSON parse failure, missing field, wrong type — yields a CONSERVATIVE
    verdict (clean=False) so the caller drops the candidate. The
-    previously cached good summary stays visible on the dashboard."""
+    previously cached good summary stays visible on the dashboard.
+
+    `surface` selects a surface-specific rider that's appended to the
+    base system prompt — see _SURFACE_RIDERS. Currently only the
+    "portfolio" surface uses one (descriptive risk language is the
+    whole point there and shouldn't be flagged as advice). Unknown
+    or None surfaces fall back to the generic rules."""
    if not candidate or not candidate.strip():
        return Verdict(clean=False, reason="empty candidate", cost_usd=0.0)

+    system_prompt = _SYSTEM_PROMPT
+    if surface and surface in _SURFACE_RIDERS:
+        system_prompt = system_prompt + _SURFACE_RIDERS[surface]
+
    messages = [
-        {"role": "system", "content": _SYSTEM_PROMPT},
+        {"role": "system", "content": system_prompt},
        # Sent as a fenced user turn so the model can't confuse the
        # candidate with instructions, even if the candidate happens to
        # contain prompt-like prose.
--- a/app/services/portfolio_analysis.py
+++ b/app/services/portfolio_analysis.py
@ -365,8 +365,14 @@ async def analyse(
        # buy/sell or allocation language is a regulatory hazard. Drop
        # the response on a reject and surface a retry-able error to the
        # caller; no analysis is ever persisted server-side anyway.
+        # surface="portfolio" applies a rider that loosens the generic
+        # "no advice" rule to permit descriptive risk language
+        # ("concentration is high", "currency exposure is unhedged",
+        # "the position warrants monitoring") which is the actual
+        # purpose of this surface, while keeping explicit
+        # buy/sell/allocation directives forbidden.
        if llm is not None:
-            verdict = await review_read(client, llm.content)
+            verdict = await review_read(client, llm.content, surface="portfolio")
            review_cost = verdict.cost_usd or 0.0
            if not verdict.clean:
                status = "leaked"
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -36,7 +36,7 @@ def stub_reviewer(monkeypatch):
    """
    from app.services.output_review import Verdict

-    async def _clean(_client, _candidate):
+    async def _clean(_client, _candidate, **_kw):
        return Verdict(clean=True, reason="stubbed-by-conftest", cost_usd=0.0)

    for mod_path in (