Server no longer holds portfolios. Holdings live in the browser (localStorage); the server publishes an anonymous ticker_universe and a gzipped /api/universe payload identical for every authenticated user, so access patterns can't betray which tickers a user holds. AI commentary is generated ephemerally from the browser-supplied pie and the cost ledger row records no positions. Migrations 0009-0011 added the universe table and dropped positions / portfolio_snapshots / portfolios. Authentication is now e-mail OTP only. Migration 0010 dropped password_hash and email_verified (every active session is by construction proof of email control). The /signup endpoint is gone; signup and login share a single email-entry page. Email rendering is HTML+plain-text multipart with a shared brand palette (app/branding.py) asserted in sync with the CSS by a drift-detection test. LLM provider defaults to DeepSeek-direct (cheaper, api.deepseek.com) with OpenRouter as automatic fallback if DeepSeek fails. ai_log_job and indicator_summary_job now iterate the two tones (NOVICE, INTERMEDIATE) per cycle so the dashboard's tone toggle is instant; PROMPT_VERSION bumped to 6 with an educational anti-TA / anti-gambling stance baked into _CORE. NOVICE mode renders a curated glossary inline (CBOE VIX, yield curve, HY OAS, etc.) with JS-positioned tooltips that survive viewport edges and sticky bars. Model name and tokens hidden from the user UI; still recorded in StrategicLog.model and AICall for admin. Layout adds a sticky top nav, a sticky bottom markets bar (one chip per exchange with status LED + headline index + 1d change), and Phase H feedback reporting is queued in tasks/todo.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
338 lines
13 KiB
Python
338 lines
13 KiB
Python
"""Hourly per-group indicator summaries — a short AI read at the top of each
|
||
Indicators tab. Costs ~$0.0003 per call on DeepSeek V4 Flash, so 10+ groups
|
||
hourly stays comfortably under the monthly cap."""
|
||
from __future__ import annotations
|
||
|
||
import asyncio
|
||
import re
|
||
from collections import defaultdict
|
||
|
||
import httpx
|
||
from sqlalchemy import desc, func, select
|
||
|
||
from app.config import get_settings, load_groups
|
||
from app.db import utcnow
|
||
from app.jobs._helpers import job_lifecycle, log
|
||
from app.models import AICall, IndicatorSummary, JobRun, Quote
|
||
from app.services.cadence import DEFAULT_POLICY
|
||
from app.services.openrouter import (
|
||
PROMPT_VERSION,
|
||
active_model,
|
||
build_aggregate_summary_system_prompt,
|
||
build_aggregate_summary_user_prompt,
|
||
build_summary_system_prompt,
|
||
build_summary_user_prompt,
|
||
call_llm,
|
||
llm_configured,
|
||
month_start,
|
||
)
|
||
|
||
|
||
AGGREGATE_GROUP_NAME = "__all__"
|
||
|
||
|
||
# Strip known meta-commentary openers the model sometimes leaks despite the
|
||
# prompt's hard constraints. Each pattern matches one leading sentence.
|
||
_LEAK_PATTERNS = [
|
||
re.compile(p, re.IGNORECASE | re.DOTALL)
|
||
for p in (
|
||
# First-person meta — "I need to / I'll / I have to / I'm going to ..."
|
||
r"^i\s+(?:need|have|must|should|am going|'ll|will|shall|can|am)[^.]*\.\s*",
|
||
# "We need / we're / we are asked / we will ..."
|
||
r"^we\s+(?:need|are|'re|will|shall|can|should|must|have)[^.]*\.\s*",
|
||
r"^let\s+(?:me|us|'?s)[^.]*\.\s*",
|
||
r"^here['’]s[^.]*\.\s*",
|
||
r"^sure[,!]?\s[^.]*\.\s*",
|
||
r"^looking at[^.]*\.\s*",
|
||
r"^based on[^.]*\.\s*",
|
||
r"^to (?:address|answer|write|summarise|summarize)[^.]*\.\s*",
|
||
r"^first[,]?\s[^.]*\.\s*",
|
||
r"^the (?:user|data shows|reader|task|request|reader sees|instructions?)[^.]*\.\s*",
|
||
r"^summary[:.]\s*",
|
||
r"^key\s*[:\-—]\s*",
|
||
r"^must\s+(?:be|cite|explain|avoid|give|stay|provide)[^.]*\.\s*",
|
||
r"^should\s+(?:be|give|cite|explain|avoid|provide)[^.]*\.\s*",
|
||
r"^avoid[^.]*\.\s*",
|
||
r"^cite\s+at\s+most[^.]*\.\s*",
|
||
r"^be\s+(?:speculative|specific|concise|brief)[^.]*\.\s*",
|
||
r"^stay\s+on[^.]*\.\s*",
|
||
r"^okay[,]?\s+",
|
||
r"^alright[,]?\s+",
|
||
r"^thinking[^.]*\.\s*",
|
||
# Prompt-leak prefixes — the model echoes example framing or rule
|
||
# headers from the system prompt.
|
||
r"^(?:good|bad|positive|negative)\s+example\s*[:\-—]\s*",
|
||
r"^example\s+(?:good|bad)\s*[:\-—]\s*",
|
||
r"^example\s*[:\-—]\s*",
|
||
r"^reference\s+style\s*[:\-—]\s*",
|
||
# Prompt label echoes (markdown-style or plain-text)
|
||
r"^(?:hard\s+)?constraints?\s*[:\-—][^.\n]*[.\n]\s*",
|
||
r"^key\s+observations?\s*[:\-—]\s*",
|
||
r"^observations?\s*[:\-—]\s*",
|
||
r"^focus\s+on[^.]*\.\s*",
|
||
r"^output\s+the\s+read[^.]*\.\s*",
|
||
r"^plain\s+prose[^.]*\.\s*",
|
||
r"^the\s+indicators?[^.]*\.\s*", # "The indicators include..." / "The indicators are..."
|
||
r"^indicators?\s*[:\-—]\s*",
|
||
r"^data\s*[:\-—]\s*",
|
||
r"^analysis\s*[:\-—]\s*",
|
||
r"^interpretation\s*[:\-—]\s*",
|
||
r"^read\s*[:\-—]\s*",
|
||
r"^note\s*[:\-—]\s*",
|
||
# Sometimes the response gets wrapped in literal quotes
|
||
r"^[\"“'`]+",
|
||
)
|
||
]
|
||
|
||
|
||
_TRAILING_QUOTE = re.compile(r"[\"”'`]+\s*$")
|
||
|
||
# Tell-tale phrases that mean the model regurgitated the prompt as its
|
||
# "answer" — we'd rather show nothing than show this.
|
||
_LEAKAGE_FLAGS = (
|
||
"≤60 words", "60 words", "must be under", "must cite", "must explain",
|
||
"no meta-commentary", "no buy/sell", "horizon. ", "1-day moves",
|
||
"the instructions are", "instructions:", "constraints:", "hard constraints",
|
||
"good example", "bad example", "reference style",
|
||
)
|
||
|
||
|
||
def looks_like_leakage(text: str) -> bool:
|
||
"""Heuristic: after cleaning, if these phrases still appear, the output
|
||
is contaminated prompt-regurgitation and shouldn't be shown."""
|
||
low = text.lower()
|
||
return any(flag in low for flag in _LEAKAGE_FLAGS)
|
||
|
||
|
||
def clean_summary(text: str) -> str:
|
||
"""Strip leading meta-commentary. If cleaning removes nearly everything
|
||
(suggesting the model emitted reasoning then ran out of tokens), fall
|
||
back to the last non-empty paragraph of the raw output — that's usually
|
||
where the actual answer ended up."""
|
||
raw = text.strip()
|
||
out = raw
|
||
# Up to 6 passes: handles compound leakage like
|
||
# "Constraints: <...>. The indicators are: <...>. <actual answer>"
|
||
for _ in range(6):
|
||
before = out
|
||
for pat in _LEAK_PATTERNS:
|
||
out = pat.sub("", out, count=1).lstrip()
|
||
if out == before:
|
||
break
|
||
if len(out) < 60 and len(raw) > 120:
|
||
# Cleaning ate too much; take the last non-empty paragraph of raw.
|
||
paragraphs = [p.strip() for p in re.split(r"\n\s*\n", raw) if p.strip()]
|
||
if paragraphs:
|
||
out = paragraphs[-1]
|
||
# Re-strip leaders from the recovered paragraph too.
|
||
for _ in range(2):
|
||
before = out
|
||
for pat in _LEAK_PATTERNS:
|
||
out = pat.sub("", out, count=1).lstrip()
|
||
if out == before:
|
||
break
|
||
# Trim any orphan closing quote/backtick from the wrap-strip above.
|
||
out = _TRAILING_QUOTE.sub("", out).rstrip()
|
||
return out
|
||
|
||
|
||
async def _latest_quotes_by_group(session) -> dict[str, list[dict]]:
|
||
"""Latest non-null quote per (group, symbol). Drops error rows."""
|
||
sub = (
|
||
select(Quote.group_name, Quote.symbol,
|
||
func.max(Quote.fetched_at).label("mx"))
|
||
.group_by(Quote.group_name, Quote.symbol)
|
||
.subquery()
|
||
)
|
||
rows = (await session.execute(
|
||
select(Quote).join(
|
||
sub,
|
||
(Quote.group_name == sub.c.group_name)
|
||
& (Quote.symbol == sub.c.symbol)
|
||
& (Quote.fetched_at == sub.c.mx),
|
||
).where(Quote.price.is_not(None))
|
||
.order_by(Quote.group_name, Quote.symbol)
|
||
)).scalars().all()
|
||
by_group: dict[str, list[dict]] = defaultdict(list)
|
||
for q in rows:
|
||
by_group[q.group_name].append({
|
||
"symbol": q.symbol, "label": q.label,
|
||
"price": q.price, "currency": q.currency,
|
||
"as_of": q.as_of, "changes": q.changes,
|
||
})
|
||
return by_group
|
||
|
||
|
||
async def _month_spend(session) -> float:
|
||
total = (await session.execute(
|
||
select(func.coalesce(func.sum(AICall.cost_usd), 0.0))
|
||
.where(AICall.called_at >= month_start())
|
||
)).scalar()
|
||
return float(total or 0.0)
|
||
|
||
|
||
async def _generate_one(
|
||
session, client: httpx.AsyncClient, group: str, quotes: list[dict],
|
||
system_prompt: str, model: str, tone: str, analysis: str,
|
||
) -> bool:
|
||
"""Generate + persist one group's summary. Returns True on success.
|
||
`model` is retained for ledger labelling but call_llm now picks the
|
||
active-provider model itself."""
|
||
user_prompt = build_summary_user_prompt(group, quotes)
|
||
try:
|
||
result = await call_llm(
|
||
client,
|
||
[{"role": "system", "content": system_prompt},
|
||
{"role": "user", "content": user_prompt}],
|
||
max_tokens=800, # DeepSeek sometimes spends 300+ on internal reasoning
|
||
)
|
||
except Exception as e:
|
||
session.add(AICall(model=active_model(), status="error", error=str(e)[:500]))
|
||
log.warning("ind_summary.failed", group=group, error=str(e)[:120])
|
||
return False
|
||
|
||
cleaned = clean_summary(result.content)
|
||
if looks_like_leakage(cleaned) or len(cleaned) < 40:
|
||
# Model regurgitated the prompt or produced nothing usable.
|
||
# Don't persist — keep the last good summary visible. Log it so
|
||
# we can see the rate of failures over time.
|
||
log.warning("ind_summary.leakage_detected",
|
||
group=group, preview=cleaned[:120])
|
||
session.add(AICall(
|
||
model=result.model,
|
||
prompt_tokens=result.prompt_tokens,
|
||
completion_tokens=result.completion_tokens,
|
||
cost_usd=result.cost_usd,
|
||
status="leaked",
|
||
))
|
||
return False
|
||
|
||
session.add(IndicatorSummary(
|
||
group_name=group,
|
||
generated_at=utcnow(),
|
||
model=result.model,
|
||
tone=tone,
|
||
analysis=analysis,
|
||
prompt_version=PROMPT_VERSION,
|
||
content=cleaned,
|
||
prompt_tokens=result.prompt_tokens,
|
||
completion_tokens=result.completion_tokens,
|
||
cost_usd=result.cost_usd,
|
||
))
|
||
session.add(AICall(
|
||
model=result.model,
|
||
prompt_tokens=result.prompt_tokens,
|
||
completion_tokens=result.completion_tokens,
|
||
cost_usd=result.cost_usd,
|
||
status="ok",
|
||
))
|
||
return True
|
||
|
||
|
||
async def run() -> None:
|
||
async with job_lifecycle("indicator_summary_job") as (session, jr):
|
||
if jr.status == "skipped":
|
||
return
|
||
s = get_settings()
|
||
if not llm_configured():
|
||
log.warning("ind_summary.skipped_no_key", provider=s.LLM_PROVIDER)
|
||
jr.status = "skipped"
|
||
return
|
||
|
||
# Cadence — same policy as ai_log_job: hourly during EU/US active,
|
||
# throttled off-hours and weekends.
|
||
last_success = (await session.execute(
|
||
select(func.max(JobRun.finished_at)).where(
|
||
JobRun.name == "indicator_summary_job",
|
||
JobRun.status == "success",
|
||
)
|
||
)).scalar()
|
||
should_run, reason = DEFAULT_POLICY.should_run(last_success)
|
||
if not should_run:
|
||
log.info("ind_summary.cadence_skip", reason=reason)
|
||
jr.status = "skipped"
|
||
jr.error = reason
|
||
return
|
||
|
||
spent = await _month_spend(session)
|
||
if spent >= s.OPENROUTER_MONTHLY_CAP_USD:
|
||
jr.status = "skipped"
|
||
jr.error = f"monthly cap reached (${spent:.2f})"
|
||
return
|
||
|
||
groups = await _latest_quotes_by_group(session)
|
||
# Only summarise groups currently configured in TOML — drops stale
|
||
# group names (e.g. an old "pie" before T212 sourcing) that still have
|
||
# quotes in the table but no UI presence.
|
||
configured = set(load_groups(s.BASELINE_TOML, s.PORTFOLIO_TOML).keys())
|
||
groups = {g: q for g, q in groups.items() if g in configured}
|
||
if not groups:
|
||
jr.status = "skipped"
|
||
return
|
||
|
||
# Phase 2 voice pivot (PROMPT_VERSION 6): generate both tones each
|
||
# run so the dashboard toggle is instant. ANALYSIS stays on the
|
||
# operator-configured default.
|
||
analysis = (s.CASSANDRA_ANALYSIS or "SPECULATIVE").upper()
|
||
tones = ("NOVICE", "INTERMEDIATE")
|
||
|
||
written = 0
|
||
async with httpx.AsyncClient(follow_redirects=True) as client:
|
||
# Sequential rather than parallel — OpenRouter free tiers can
|
||
# throttle bursts; total work is small (~14-16 calls × ~5s each).
|
||
for tone in tones:
|
||
system_prompt = build_summary_system_prompt(tone, analysis)
|
||
for group, quotes in groups.items():
|
||
ok = await _generate_one(
|
||
session, client, group, quotes,
|
||
system_prompt, active_model(), tone, analysis,
|
||
)
|
||
if ok:
|
||
written += 1
|
||
await session.commit() # partial progress survives mid-job error
|
||
|
||
# One aggregate read across all groups, stored under __all__.
|
||
agg_system = build_aggregate_summary_system_prompt(tone, analysis)
|
||
agg_user = build_aggregate_summary_user_prompt(groups)
|
||
try:
|
||
result = await call_llm(
|
||
client,
|
||
[{"role": "system", "content": agg_system},
|
||
{"role": "user", "content": agg_user}],
|
||
max_tokens=1500, # room for reasoning + 80-word output
|
||
)
|
||
session.add(IndicatorSummary(
|
||
group_name=AGGREGATE_GROUP_NAME,
|
||
generated_at=utcnow(),
|
||
model=result.model,
|
||
tone=tone,
|
||
analysis=analysis,
|
||
prompt_version=PROMPT_VERSION,
|
||
content=clean_summary(result.content),
|
||
prompt_tokens=result.prompt_tokens,
|
||
completion_tokens=result.completion_tokens,
|
||
cost_usd=result.cost_usd,
|
||
))
|
||
session.add(AICall(
|
||
model=result.model,
|
||
prompt_tokens=result.prompt_tokens,
|
||
completion_tokens=result.completion_tokens,
|
||
cost_usd=result.cost_usd, status="ok",
|
||
))
|
||
written += 1
|
||
except Exception as e:
|
||
session.add(AICall(
|
||
model=active_model(), status="error",
|
||
error=f"{tone}/agg: {str(e)[:480]}",
|
||
))
|
||
log.warning("ind_summary.agg_failed",
|
||
tone=tone, error=str(e)[:120])
|
||
await session.commit()
|
||
|
||
jr.items_written = written
|
||
log.info("ind_summary.done",
|
||
groups=len(groups), tones=len(tones), written=written)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(run())
|