review: per-surface rider, loosen for portfolio commentary
Reviewer was rejecting legitimate IT portfolio analyses, citing
descriptive risk language as actionable advice:
reason: "Allocation guidance throughout: 'concentrazione gestibile',
'non eliminabile', 'bassa esposizione', 'va monitorato'. Treats
portfolio construction as actionable."
These phrases describe portfolio state (manageable concentration,
non-eliminable risk, low exposure, warrants monitoring) without
directing the user to take action. They are exactly the kind of
prose a portfolio commentary surface is supposed to produce. The
reviewer's generic "no financial advice" rule is too broad here.
Add a `surface` parameter to review_read() with a per-surface rider
mechanism (_SURFACE_RIDERS). The "portfolio" rider:
- Lists DESCRIPTIVE phrasings that are EXPLICITLY permitted:
attribute naming ("high concentration", "currency exposure"),
thesis invalidation conditions, impersonal observations about a
position's sensitivity.
- Tightens the reject list to EXPLICIT calls to action: imperative
verbs aimed at the reader, "you should", "consider X-ing",
specific allocation prescriptions, price-target predictions.
portfolio_analysis.analyse() now passes surface="portfolio". All
other reviewer call sites (indicator summary, log, chat, digest)
default to surface=None and keep the generic rules.
tests/conftest.py's autouse review_read stub picks up **_kw so
adding new keyword arguments to review_read doesn't keep breaking
the locale-integration tests.
Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
d47b310898
commit
0060166d32
3 changed files with 57 additions and 5 deletions
|
|
@ -101,6 +101,38 @@ No preamble, no markdown fences, no other fields.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# Surface-specific rider appended to the system prompt when the caller
|
||||||
|
# passes a known `surface` to review_read(). Lets us relax or tighten
|
||||||
|
# rules per editorial context without rewriting the whole prompt.
|
||||||
|
_SURFACE_RIDERS = {
|
||||||
|
"portfolio": """\
|
||||||
|
|
||||||
|
# Surface: portfolio commentary
|
||||||
|
This text describes a real investor's holdings. DESCRIPTIVE risk
|
||||||
|
language is the whole point of this surface and must NOT be flagged
|
||||||
|
as financial advice. The following ARE fine:
|
||||||
|
- Naming portfolio attributes: "high concentration", "single-name
|
||||||
|
exposure", "currency risk is unhedged", "FX exposure", "elevated
|
||||||
|
risk", "stretched valuations", "concentration is manageable", "low
|
||||||
|
diversification".
|
||||||
|
- Stating what would invalidate the posture: "this view fails if
|
||||||
|
rates retrace", "the thesis depends on X holding".
|
||||||
|
- Impersonal observation about a position's behaviour or sensitivity:
|
||||||
|
"the position warrants monitoring", "carries vulnerability to a
|
||||||
|
policy shock", "is sensitive to rate moves".
|
||||||
|
|
||||||
|
ONLY flag EXPLICIT calls to action where a verb or directive is
|
||||||
|
aimed at the reader:
|
||||||
|
- Imperative verbs in the second person: "buy X", "sell Y",
|
||||||
|
"trim Z", "hedge", "rotate into".
|
||||||
|
- "You should", "investors should", "consider X-ing", "we recommend".
|
||||||
|
- Specific allocation prescriptions: "go 20% bonds", "overweight
|
||||||
|
tech", "underweight defensives".
|
||||||
|
- Price-target predictions: "will reach $X by year-end".
|
||||||
|
""",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
class Verdict:
|
class Verdict:
|
||||||
clean: bool
|
clean: bool
|
||||||
|
|
@ -108,18 +140,32 @@ class Verdict:
|
||||||
cost_usd: float | None # cost of the review call itself, for the ledger
|
cost_usd: float | None # cost of the review call itself, for the ledger
|
||||||
|
|
||||||
|
|
||||||
async def review_read(client: httpx.AsyncClient, candidate: str) -> Verdict:
|
async def review_read(
|
||||||
|
client: httpx.AsyncClient,
|
||||||
|
candidate: str,
|
||||||
|
surface: str | None = None,
|
||||||
|
) -> Verdict:
|
||||||
"""Ask the LLM whether `candidate` is a publishable read.
|
"""Ask the LLM whether `candidate` is a publishable read.
|
||||||
|
|
||||||
Returns Verdict(clean, reason, cost). Any error — provider failure,
|
Returns Verdict(clean, reason, cost). Any error — provider failure,
|
||||||
JSON parse failure, missing field, wrong type — yields a CONSERVATIVE
|
JSON parse failure, missing field, wrong type — yields a CONSERVATIVE
|
||||||
verdict (clean=False) so the caller drops the candidate. The
|
verdict (clean=False) so the caller drops the candidate. The
|
||||||
previously cached good summary stays visible on the dashboard."""
|
previously cached good summary stays visible on the dashboard.
|
||||||
|
|
||||||
|
`surface` selects a surface-specific rider that's appended to the
|
||||||
|
base system prompt — see _SURFACE_RIDERS. Currently only the
|
||||||
|
"portfolio" surface uses one (descriptive risk language is the
|
||||||
|
whole point there and shouldn't be flagged as advice). Unknown
|
||||||
|
or None surfaces fall back to the generic rules."""
|
||||||
if not candidate or not candidate.strip():
|
if not candidate or not candidate.strip():
|
||||||
return Verdict(clean=False, reason="empty candidate", cost_usd=0.0)
|
return Verdict(clean=False, reason="empty candidate", cost_usd=0.0)
|
||||||
|
|
||||||
|
system_prompt = _SYSTEM_PROMPT
|
||||||
|
if surface and surface in _SURFACE_RIDERS:
|
||||||
|
system_prompt = system_prompt + _SURFACE_RIDERS[surface]
|
||||||
|
|
||||||
messages = [
|
messages = [
|
||||||
{"role": "system", "content": _SYSTEM_PROMPT},
|
{"role": "system", "content": system_prompt},
|
||||||
# Sent as a fenced user turn so the model can't confuse the
|
# Sent as a fenced user turn so the model can't confuse the
|
||||||
# candidate with instructions, even if the candidate happens to
|
# candidate with instructions, even if the candidate happens to
|
||||||
# contain prompt-like prose.
|
# contain prompt-like prose.
|
||||||
|
|
|
||||||
|
|
@ -365,8 +365,14 @@ async def analyse(
|
||||||
# buy/sell or allocation language is a regulatory hazard. Drop
|
# buy/sell or allocation language is a regulatory hazard. Drop
|
||||||
# the response on a reject and surface a retry-able error to the
|
# the response on a reject and surface a retry-able error to the
|
||||||
# caller; no analysis is ever persisted server-side anyway.
|
# caller; no analysis is ever persisted server-side anyway.
|
||||||
|
# surface="portfolio" applies a rider that loosens the generic
|
||||||
|
# "no advice" rule to permit descriptive risk language
|
||||||
|
# ("concentration is high", "currency exposure is unhedged",
|
||||||
|
# "the position warrants monitoring") which is the actual
|
||||||
|
# purpose of this surface, while keeping explicit
|
||||||
|
# buy/sell/allocation directives forbidden.
|
||||||
if llm is not None:
|
if llm is not None:
|
||||||
verdict = await review_read(client, llm.content)
|
verdict = await review_read(client, llm.content, surface="portfolio")
|
||||||
review_cost = verdict.cost_usd or 0.0
|
review_cost = verdict.cost_usd or 0.0
|
||||||
if not verdict.clean:
|
if not verdict.clean:
|
||||||
status = "leaked"
|
status = "leaked"
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@ def stub_reviewer(monkeypatch):
|
||||||
"""
|
"""
|
||||||
from app.services.output_review import Verdict
|
from app.services.output_review import Verdict
|
||||||
|
|
||||||
async def _clean(_client, _candidate):
|
async def _clean(_client, _candidate, **_kw):
|
||||||
return Verdict(clean=True, reason="stubbed-by-conftest", cost_usd=0.0)
|
return Verdict(clean=True, reason="stubbed-by-conftest", cost_usd=0.0)
|
||||||
|
|
||||||
for mod_path in (
|
for mod_path in (
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue