add Eurostat + UK ONS sources; valuation/bubble/economy/bonds groups; aggregate read; market-open header

Three new data sources hooked into the existing SOURCES registry. All open APIs, no keys: - EUROSTAT: prefix EUROSTAT:dataset?dim=val&... — current EU bond yields (Bund/OAT/BTP/EZ) and Eurozone economic indicators that FRED's OECD-mirror series stopped updating in 2022-2023. - ONS: prefix ONS:topic/cdid/dataset — current UK CPI, unemployment, GDP, industrial production. Replaces the 5+ month-stale FRED LRHUTTTTGBM156S mirror. New indicator groups in default.toml feed the strategic/fundamental lens we converged on: valuation (CAPE/Buffett anchors), bubble_watch (SKEW/VVIX/RSP vs SPY/HYG vs TLT/IPO/crypto), economy (multi-region, ALL current-or-stale-flagged), bonds (UK/EU/US/JPN sovereign yields). Indicator panel now opens with an AI "read" interpretation per group (generated hourly at :07 UTC alongside an aggregate cross-group read shown in the dashboard header). The aggregate is grounded by a markets strip — NYSE/LSE/Frankfurt/Tokyo/HK/Shanghai with open/closed LEDs and next-open countdown, computed locally from each exchange's tz. Other UX bits: indicator-row tooltips populated from TOML notes; rows whose last observation is >90 days old get a 'stale' chip; ghost symbols (in DB but no longer in TOML) filtered out of the panel; Eurostat/ONS symbols display as short codes rather than the full API path. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 23:07:42 +01:00 · 2026-05-15 23:07:42 +01:00 · 1edf9cad41
commit 1edf9cad41
parent a10409c02b
15 changed files with 1156 additions and 10 deletions
--- a/app/jobs/indicator_summary_job.py
+++ b/app/jobs/indicator_summary_job.py
@ -0,0 +1,237 @@
+"""Hourly per-group indicator summaries — a short AI read at the top of each
+Indicators tab. Costs ~$0.0003 per call on DeepSeek V4 Flash, so 10+ groups
+hourly stays comfortably under the monthly cap."""
+from __future__ import annotations
+
+import asyncio
+import re
+from collections import defaultdict
+
+import httpx
+from sqlalchemy import desc, func, select
+
+from app.config import get_settings, load_groups
+from app.db import utcnow
+from app.jobs._helpers import job_lifecycle, log
+from app.models import AICall, IndicatorSummary, Quote
+from app.services.openrouter import (
+    PROMPT_VERSION,
+    build_aggregate_summary_system_prompt,
+    build_aggregate_summary_user_prompt,
+    build_summary_system_prompt,
+    build_summary_user_prompt,
+    call_openrouter,
+    month_start,
+)
+
+
+AGGREGATE_GROUP_NAME = "__all__"
+
+
+# Strip known meta-commentary openers the model sometimes leaks despite the
+# prompt's hard constraints. Each pattern matches one leading sentence.
+_LEAK_PATTERNS = [
+    re.compile(p, re.IGNORECASE | re.DOTALL)
+    for p in (
+        # First-person meta — "I need to / I'll / I have to / I'm going to ..."
+        r"^i\s+(?:need|have|must|should|am going|'ll|will|shall|can|am)[^.]*\.\s*",
+        # "We need / we're / we are asked / we will ..."
+        r"^we\s+(?:need|are|'re|will|shall|can|should|must|have)[^.]*\.\s*",
+        r"^let\s+(?:me|us|'?s)[^.]*\.\s*",
+        r"^here['’]s[^.]*\.\s*",
+        r"^sure[,!]?\s[^.]*\.\s*",
+        r"^looking at[^.]*\.\s*",
+        r"^based on[^.]*\.\s*",
+        r"^to (?:address|answer|write|summarise|summarize)[^.]*\.\s*",
+        r"^first[,]?\s[^.]*\.\s*",
+        r"^the (?:user|data shows|reader|task|request)[^.]*\.\s*",
+        r"^summary[:.]\s*",
+        r"^okay[,]?\s+",
+        r"^alright[,]?\s+",
+        r"^thinking[^.]*\.\s*",
+    )
+]
+
+
+def clean_summary(text: str) -> str:
+    """Strip leading meta-commentary. If cleaning removes nearly everything
+    (suggesting the model emitted reasoning then ran out of tokens), fall
+    back to the last non-empty paragraph of the raw output — that's usually
+    where the actual answer ended up."""
+    raw = text.strip()
+    out = raw
+    for _ in range(2):
+        before = out
+        for pat in _LEAK_PATTERNS:
+            out = pat.sub("", out, count=1).lstrip()
+        if out == before:
+            break
+    if len(out) < 60 and len(raw) > 120:
+        # Cleaning ate too much; take the last non-empty paragraph of raw.
+        paragraphs = [p.strip() for p in re.split(r"\n\s*\n", raw) if p.strip()]
+        if paragraphs:
+            out = paragraphs[-1]
+    return out
+
+
+async def _latest_quotes_by_group(session) -> dict[str, list[dict]]:
+    """Latest non-null quote per (group, symbol). Drops error rows."""
+    sub = (
+        select(Quote.group_name, Quote.symbol,
+               func.max(Quote.fetched_at).label("mx"))
+        .group_by(Quote.group_name, Quote.symbol)
+        .subquery()
+    )
+    rows = (await session.execute(
+        select(Quote).join(
+            sub,
+            (Quote.group_name == sub.c.group_name)
+            & (Quote.symbol == sub.c.symbol)
+            & (Quote.fetched_at == sub.c.mx),
+        ).where(Quote.price.is_not(None))
+        .order_by(Quote.group_name, Quote.symbol)
+    )).scalars().all()
+    by_group: dict[str, list[dict]] = defaultdict(list)
+    for q in rows:
+        by_group[q.group_name].append({
+            "symbol": q.symbol, "label": q.label,
+            "price": q.price, "currency": q.currency,
+            "as_of": q.as_of, "changes": q.changes,
+        })
+    return by_group
+
+
+async def _month_spend(session) -> float:
+    total = (await session.execute(
+        select(func.coalesce(func.sum(AICall.cost_usd), 0.0))
+        .where(AICall.called_at >= month_start())
+    )).scalar()
+    return float(total or 0.0)
+
+
+async def _generate_one(
+    session, client: httpx.AsyncClient, group: str, quotes: list[dict],
+    system_prompt: str, model: str, tone: str, analysis: str,
+) -> bool:
+    """Generate + persist one group's summary. Returns True on success."""
+    user_prompt = build_summary_user_prompt(group, quotes)
+    try:
+        result = await call_openrouter(
+            client,
+            [{"role": "system", "content": system_prompt},
+             {"role": "user",   "content": user_prompt}],
+            model=model,
+            max_tokens=800,  # DeepSeek sometimes spends 300+ on internal reasoning
+        )
+    except Exception as e:
+        session.add(AICall(model=model, status="error", error=str(e)[:500]))
+        log.warning("ind_summary.failed", group=group, error=str(e)[:120])
+        return False
+
+    session.add(IndicatorSummary(
+        group_name=group,
+        generated_at=utcnow(),
+        model=result.model,
+        tone=tone,
+        analysis=analysis,
+        prompt_version=PROMPT_VERSION,
+        content=clean_summary(result.content),
+        prompt_tokens=result.prompt_tokens,
+        completion_tokens=result.completion_tokens,
+        cost_usd=result.cost_usd,
+    ))
+    session.add(AICall(
+        model=result.model,
+        prompt_tokens=result.prompt_tokens,
+        completion_tokens=result.completion_tokens,
+        cost_usd=result.cost_usd,
+        status="ok",
+    ))
+    return True
+
+
+async def run() -> None:
+    async with job_lifecycle("indicator_summary_job") as (session, jr):
+        if jr.status == "skipped":
+            return
+        s = get_settings()
+        if not s.OPENROUTER_API_KEY:
+            jr.status = "skipped"
+            return
+
+        spent = await _month_spend(session)
+        if spent >= s.OPENROUTER_MONTHLY_CAP_USD:
+            jr.status = "skipped"
+            jr.error = f"monthly cap reached (${spent:.2f})"
+            return
+
+        groups = await _latest_quotes_by_group(session)
+        # Only summarise groups currently configured in TOML — drops stale
+        # group names (e.g. an old "pie" before T212 sourcing) that still have
+        # quotes in the table but no UI presence.
+        configured = set(load_groups(s.BASELINE_TOML, s.PORTFOLIO_TOML).keys())
+        groups = {g: q for g, q in groups.items() if g in configured}
+        if not groups:
+            jr.status = "skipped"
+            return
+
+        tone = s.CASSANDRA_TONE.upper()
+        analysis = s.CASSANDRA_ANALYSIS.upper()
+        system_prompt = build_summary_system_prompt(tone, analysis)
+
+        written = 0
+        async with httpx.AsyncClient(follow_redirects=True) as client:
+            # Sequential rather than parallel — OpenRouter free tiers can
+            # throttle bursts; total work is small (~12 calls × ~5s each).
+            for group, quotes in groups.items():
+                ok = await _generate_one(
+                    session, client, group, quotes,
+                    system_prompt, s.OPENROUTER_MODEL, tone, analysis,
+                )
+                if ok:
+                    written += 1
+                await session.commit()  # partial progress survives mid-job error
+
+            # One aggregate read across all groups, stored under __all__.
+            agg_system = build_aggregate_summary_system_prompt(tone, analysis)
+            agg_user = build_aggregate_summary_user_prompt(groups)
+            try:
+                result = await call_openrouter(
+                    client,
+                    [{"role": "system", "content": agg_system},
+                     {"role": "user", "content": agg_user}],
+                    model=s.OPENROUTER_MODEL,
+                    max_tokens=1500,  # room for reasoning + 80-word output
+                )
+                session.add(IndicatorSummary(
+                    group_name=AGGREGATE_GROUP_NAME,
+                    generated_at=utcnow(),
+                    model=result.model,
+                    tone=tone,
+                    analysis=analysis,
+                    prompt_version=PROMPT_VERSION,
+                    content=clean_summary(result.content),
+                    prompt_tokens=result.prompt_tokens,
+                    completion_tokens=result.completion_tokens,
+                    cost_usd=result.cost_usd,
+                ))
+                session.add(AICall(
+                    model=result.model,
+                    prompt_tokens=result.prompt_tokens,
+                    completion_tokens=result.completion_tokens,
+                    cost_usd=result.cost_usd, status="ok",
+                ))
+                written += 1
+            except Exception as e:
+                session.add(AICall(
+                    model=s.OPENROUTER_MODEL, status="error", error=str(e)[:500],
+                ))
+                log.warning("ind_summary.agg_failed", error=str(e)[:120])
+            await session.commit()
+
+        jr.items_written = written
+        log.info("ind_summary.done", groups=len(groups), written=written)
+
+
+if __name__ == "__main__":
+    asyncio.run(run())