phase G: data minimisation + passwordless auth + DeepSeek-first LLM

Server no longer holds portfolios. Holdings live in the browser (localStorage); the server publishes an anonymous ticker_universe and a gzipped /api/universe payload identical for every authenticated user, so access patterns can't betray which tickers a user holds. AI commentary is generated ephemerally from the browser-supplied pie and the cost ledger row records no positions. Migrations 0009-0011 added the universe table and dropped positions / portfolio_snapshots / portfolios. Authentication is now e-mail OTP only. Migration 0010 dropped password_hash and email_verified (every active session is by construction proof of email control). The /signup endpoint is gone; signup and login share a single email-entry page. Email rendering is HTML+plain-text multipart with a shared brand palette (app/branding.py) asserted in sync with the CSS by a drift-detection test. LLM provider defaults to DeepSeek-direct (cheaper, api.deepseek.com) with OpenRouter as automatic fallback if DeepSeek fails. ai_log_job and indicator_summary_job now iterate the two tones (NOVICE, INTERMEDIATE) per cycle so the dashboard's tone toggle is instant; PROMPT_VERSION bumped to 6 with an educational anti-TA / anti-gambling stance baked into _CORE. NOVICE mode renders a curated glossary inline (CBOE VIX, yield curve, HY OAS, etc.) with JS-positioned tooltips that survive viewport edges and sticky bars. Model name and tokens hidden from the user UI; still recorded in StrategicLog.model and AICall for admin. Layout adds a sticky top nav, a sticky bottom markets bar (one chip per exchange with status LED + headline index + 1d change), and Phase H feedback reporting is queued in tasks/todo.md. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-18 14:16:57 +01:00 · 2026-05-18 14:16:57 +01:00 · 6e7f57c6b2
commit 6e7f57c6b2
parent 480fd311c5
54 changed files with 5005 additions and 916 deletions
--- a/app/jobs/ai_log_job.py
+++ b/app/jobs/ai_log_job.py
@ -17,9 +17,11 @@ from app.models import AICall, Headline, JobRun, Quote, StrategicLog
 from app.services.cadence import DEFAULT_POLICY
 from app.services.openrouter import (
    PROMPT_VERSION,
+    active_model,
    build_system_prompt,
    build_user_prompt,
-    call_openrouter,
+    call_llm,
+    llm_configured,
    month_start,
 )

@ -98,8 +100,8 @@ async def run() -> None:
        if jr.status == "skipped":
            return
        s = get_settings()
-        if not s.OPENROUTER_API_KEY:
-            log.warning("ai_log.skipped_no_key")
+        if not llm_configured():
+            log.warning("ai_log.skipped_no_key", provider=s.LLM_PROVIDER)
            jr.status = "skipped"
            return

@ -153,47 +155,71 @@ async def run() -> None:
            previous_log=previous_log,
        )

-        system_prompt = build_system_prompt(s.CASSANDRA_TONE, s.CASSANDRA_ANALYSIS)
-        try:
-            async with httpx.AsyncClient(follow_redirects=True) as client:
-                result = await call_openrouter(
-                    client,
-                    [{"role": "system", "content": system_prompt},
-                     {"role": "user", "content": user_prompt}],
-                    model=s.OPENROUTER_MODEL,
-                )
-        except Exception as e:
-            session.add(AICall(
-                model=s.OPENROUTER_MODEL, status="error", error=str(e)[:500],
-            ))
-            await session.commit()
-            raise
+        # Phase 2 voice pivot (PROMPT_VERSION 6): generate both tones per
+        # run so the dashboard toggle is instant. Analysis stays on the
+        # operator-configured default (DRY|SPECULATIVE is a system-wide
+        # preference, not a per-user toggle). PRO was dropped.
+        analysis = (s.CASSANDRA_ANALYSIS or "SPECULATIVE").upper()
+        variants = [
+            ("NOVICE", analysis),
+            ("INTERMEDIATE", analysis),
+        ]
+        written = 0
+        async with httpx.AsyncClient(follow_redirects=True) as client:
+            for tone, analysis in variants:
+                # Re-check cost cap between variants so a runaway run is
+                # bounded.
+                spent = await _month_spend(session)
+                if spent >= s.OPENROUTER_MONTHLY_CAP_USD:
+                    log.warning("ai_log.cap_reached_midrun",
+                                spent=spent, completed=written)
+                    break

-        session.add(StrategicLog(
-            generated_at=utcnow(),
-            model=result.model,
-            anchor_date=anchor,
-            prompt_version=PROMPT_VERSION,
-            tone=s.CASSANDRA_TONE.upper(),
-            analysis=s.CASSANDRA_ANALYSIS.upper(),
-            content=result.content,
-            prompt_tokens=result.prompt_tokens,
-            completion_tokens=result.completion_tokens,
-            cost_usd=result.cost_usd,
-        ))
-        session.add(AICall(
-            model=result.model,
-            prompt_tokens=result.prompt_tokens,
-            completion_tokens=result.completion_tokens,
-            cost_usd=result.cost_usd,
-            status="ok",
-        ))
-        await session.commit()
-        jr.items_written = 1
-        log.info("ai_log.done",
-                 model=result.model,
-                 prompt_tokens=result.prompt_tokens,
-                 completion_tokens=result.completion_tokens)
+                system_prompt = build_system_prompt(tone, analysis)
+                try:
+                    result = await call_llm(
+                        client,
+                        [{"role": "system", "content": system_prompt},
+                         {"role": "user", "content": user_prompt}],
+                    )
+                except Exception as e:
+                    session.add(AICall(
+                        model=active_model(), status="error",
+                        error=f"{tone}/{analysis}: {str(e)[:480]}",
+                    ))
+                    await session.commit()
+                    log.error("ai_log.variant_failed",
+                              tone=tone, analysis=analysis, error=str(e)[:200])
+                    continue
+
+                session.add(StrategicLog(
+                    generated_at=utcnow(),
+                    model=result.model,
+                    anchor_date=anchor,
+                    prompt_version=PROMPT_VERSION,
+                    tone=tone,
+                    analysis=analysis,
+                    content=result.content,
+                    prompt_tokens=result.prompt_tokens,
+                    completion_tokens=result.completion_tokens,
+                    cost_usd=result.cost_usd,
+                ))
+                session.add(AICall(
+                    model=result.model,
+                    prompt_tokens=result.prompt_tokens,
+                    completion_tokens=result.completion_tokens,
+                    cost_usd=result.cost_usd,
+                    status="ok",
+                ))
+                await session.commit()
+                written += 1
+                log.info("ai_log.variant_done",
+                         tone=tone, analysis=analysis,
+                         prompt_tokens=result.prompt_tokens,
+                         completion_tokens=result.completion_tokens)
+
+        jr.items_written = written
+        log.info("ai_log.done", variants=written, total=len(variants))


 if __name__ == "__main__":
--- a/app/jobs/indicator_summary_job.py
+++ b/app/jobs/indicator_summary_job.py
@ -17,11 +17,13 @@ from app.models import AICall, IndicatorSummary, JobRun, Quote
 from app.services.cadence import DEFAULT_POLICY
 from app.services.openrouter import (
    PROMPT_VERSION,
+    active_model,
    build_aggregate_summary_system_prompt,
    build_aggregate_summary_user_prompt,
    build_summary_system_prompt,
    build_summary_user_prompt,
-    call_openrouter,
+    call_llm,
+    llm_configured,
    month_start,
 )

@ -173,18 +175,19 @@ async def _generate_one(
    session, client: httpx.AsyncClient, group: str, quotes: list[dict],
    system_prompt: str, model: str, tone: str, analysis: str,
 ) -> bool:
-    """Generate + persist one group's summary. Returns True on success."""
+    """Generate + persist one group's summary. Returns True on success.
+    `model` is retained for ledger labelling but call_llm now picks the
+    active-provider model itself."""
    user_prompt = build_summary_user_prompt(group, quotes)
    try:
-        result = await call_openrouter(
+        result = await call_llm(
            client,
            [{"role": "system", "content": system_prompt},
             {"role": "user",   "content": user_prompt}],
-            model=model,
            max_tokens=800,  # DeepSeek sometimes spends 300+ on internal reasoning
        )
    except Exception as e:
-        session.add(AICall(model=model, status="error", error=str(e)[:500]))
+        session.add(AICall(model=active_model(), status="error", error=str(e)[:500]))
        log.warning("ind_summary.failed", group=group, error=str(e)[:120])
        return False

@ -231,7 +234,8 @@ async def run() -> None:
        if jr.status == "skipped":
            return
        s = get_settings()
-        if not s.OPENROUTER_API_KEY:
+        if not llm_configured():
+            log.warning("ind_summary.skipped_no_key", provider=s.LLM_PROVIDER)
            jr.status = "skipped"
            return

@ -266,62 +270,68 @@ async def run() -> None:
            jr.status = "skipped"
            return

-        tone = s.CASSANDRA_TONE.upper()
-        analysis = s.CASSANDRA_ANALYSIS.upper()
-        system_prompt = build_summary_system_prompt(tone, analysis)
+        # Phase 2 voice pivot (PROMPT_VERSION 6): generate both tones each
+        # run so the dashboard toggle is instant. ANALYSIS stays on the
+        # operator-configured default.
+        analysis = (s.CASSANDRA_ANALYSIS or "SPECULATIVE").upper()
+        tones = ("NOVICE", "INTERMEDIATE")

        written = 0
        async with httpx.AsyncClient(follow_redirects=True) as client:
            # Sequential rather than parallel — OpenRouter free tiers can
-            # throttle bursts; total work is small (~12 calls × ~5s each).
-            for group, quotes in groups.items():
-                ok = await _generate_one(
-                    session, client, group, quotes,
-                    system_prompt, s.OPENROUTER_MODEL, tone, analysis,
-                )
-                if ok:
-                    written += 1
-                await session.commit()  # partial progress survives mid-job error
+            # throttle bursts; total work is small (~14-16 calls × ~5s each).
+            for tone in tones:
+                system_prompt = build_summary_system_prompt(tone, analysis)
+                for group, quotes in groups.items():
+                    ok = await _generate_one(
+                        session, client, group, quotes,
+                        system_prompt, active_model(), tone, analysis,
+                    )
+                    if ok:
+                        written += 1
+                    await session.commit()  # partial progress survives mid-job error

-            # One aggregate read across all groups, stored under __all__.
-            agg_system = build_aggregate_summary_system_prompt(tone, analysis)
-            agg_user = build_aggregate_summary_user_prompt(groups)
-            try:
-                result = await call_openrouter(
-                    client,
-                    [{"role": "system", "content": agg_system},
-                     {"role": "user", "content": agg_user}],
-                    model=s.OPENROUTER_MODEL,
-                    max_tokens=1500,  # room for reasoning + 80-word output
-                )
-                session.add(IndicatorSummary(
-                    group_name=AGGREGATE_GROUP_NAME,
-                    generated_at=utcnow(),
-                    model=result.model,
-                    tone=tone,
-                    analysis=analysis,
-                    prompt_version=PROMPT_VERSION,
-                    content=clean_summary(result.content),
-                    prompt_tokens=result.prompt_tokens,
-                    completion_tokens=result.completion_tokens,
-                    cost_usd=result.cost_usd,
-                ))
-                session.add(AICall(
-                    model=result.model,
-                    prompt_tokens=result.prompt_tokens,
-                    completion_tokens=result.completion_tokens,
-                    cost_usd=result.cost_usd, status="ok",
-                ))
-                written += 1
-            except Exception as e:
-                session.add(AICall(
-                    model=s.OPENROUTER_MODEL, status="error", error=str(e)[:500],
-                ))
-                log.warning("ind_summary.agg_failed", error=str(e)[:120])
-            await session.commit()
+                # One aggregate read across all groups, stored under __all__.
+                agg_system = build_aggregate_summary_system_prompt(tone, analysis)
+                agg_user = build_aggregate_summary_user_prompt(groups)
+                try:
+                    result = await call_llm(
+                        client,
+                        [{"role": "system", "content": agg_system},
+                         {"role": "user", "content": agg_user}],
+                        max_tokens=1500,  # room for reasoning + 80-word output
+                    )
+                    session.add(IndicatorSummary(
+                        group_name=AGGREGATE_GROUP_NAME,
+                        generated_at=utcnow(),
+                        model=result.model,
+                        tone=tone,
+                        analysis=analysis,
+                        prompt_version=PROMPT_VERSION,
+                        content=clean_summary(result.content),
+                        prompt_tokens=result.prompt_tokens,
+                        completion_tokens=result.completion_tokens,
+                        cost_usd=result.cost_usd,
+                    ))
+                    session.add(AICall(
+                        model=result.model,
+                        prompt_tokens=result.prompt_tokens,
+                        completion_tokens=result.completion_tokens,
+                        cost_usd=result.cost_usd, status="ok",
+                    ))
+                    written += 1
+                except Exception as e:
+                    session.add(AICall(
+                        model=active_model(), status="error",
+                        error=f"{tone}/agg: {str(e)[:480]}",
+                    ))
+                    log.warning("ind_summary.agg_failed",
+                                tone=tone, error=str(e)[:120])
+                await session.commit()

        jr.items_written = written
-        log.info("ind_summary.done", groups=len(groups), written=written)
+        log.info("ind_summary.done",
+                 groups=len(groups), tones=len(tones), written=written)


 if __name__ == "__main__":
--- a/app/jobs/market_job.py
+++ b/app/jobs/market_job.py
@ -1,5 +1,6 @@
-"""Hourly market ingestion: fetch every (symbol, group) defined in TOML and
-insert one Quote row per fetch."""
+"""Hourly market ingestion: fetch every (symbol, group) defined in TOML
+*plus* every ticker in the Phase G shared ticker_universe, inserting one
+Quote row per fetch."""
 from __future__ import annotations

 import asyncio
@ -11,6 +12,7 @@ from app.db import utcnow
 from app.jobs._helpers import job_lifecycle, log
 from app.models import Quote
 from app.services.market import fetch
+from app.services.ticker_universe import get_all_tickers


 async def run() -> None:
@ -21,11 +23,27 @@ async def run() -> None:
        groups = load_groups(s.BASELINE_TOML, s.PORTFOLIO_TOML)
        anchor = s.CASSANDRA_ANCHOR_DATE or None

+        # Build the (group, symbol, label, note) work list from config TOML.
+        items_flat: list[tuple[str, str, str, str]] = [
+            (group, sym, lab, note)
+            for group, items in groups.items()
+            for sym, lab, note in items
+        ]
+        configured_syms = {sym for _, sym, _, _ in items_flat}
+
+        # Phase G: extend with anything in ticker_universe that isn't
+        # already covered by config. These land under group_name="universe"
+        # — the /api/universe endpoint reads the latest quote per symbol
+        # regardless of group.
+        universe_tickers = await get_all_tickers(session)
+        for t in universe_tickers:
+            if t not in configured_syms:
+                items_flat.append(("universe", t, t, ""))
+
        async with httpx.AsyncClient(follow_redirects=True) as client:
            tasks = [
                fetch(client, sym, lab, note, anchor)
-                for group, items in groups.items()
-                for sym, lab, note in items
+                for _, sym, lab, note in items_flat
            ]
            # Run in parallel but bounded — Yahoo can throttle if we hammer.
            sem = asyncio.Semaphore(16)
@ -34,14 +52,8 @@ async def run() -> None:
                    return await t
            quotes = await asyncio.gather(*(bounded(t) for t in tasks))

-        # Re-index quotes back to their group for persistence.
-        items_flat = [
-            (group, sym)
-            for group, items in groups.items()
-            for sym, _, _ in items
-        ]
        now = utcnow()
-        for (group, _sym), q in zip(items_flat, quotes):
+        for (group, _sym, _lab, _note), q in zip(items_flat, quotes):
            session.add(Quote(
                symbol=q.symbol,
                source=q.source,
@ -58,7 +70,12 @@ async def run() -> None:
            ))
        await session.commit()
        run.items_written = len(quotes)
-        log.info("market_job.done", count=len(quotes))
+        log.info(
+            "market_job.done",
+            count=len(quotes),
+            configured=len(configured_syms),
+            universe=len(universe_tickers),
+        )


 if __name__ == "__main__":
--- a/app/jobs/news_job.py
+++ b/app/jobs/news_job.py
@ -11,7 +11,7 @@ from sqlalchemy.dialects.mysql import insert as mysql_insert

 from app.db import utcnow
 from app.jobs._helpers import job_lifecycle, log
-from app.models import Feed, Headline, Portfolio, PortfolioSnapshot, Position
+from app.models import Feed, Headline, InstrumentMap, TickerUniverse
 from app.services.news import dedupe, fetch_feed, fetch_yahoo_news


@ -42,20 +42,20 @@ async def run() -> None:
            await session.execute(select(Feed).where(Feed.enabled == True))
        ).scalars().all()

-        # Portfolio tickers + names now come from the latest T212 snapshot,
-        # not from TOML. The (ticker, name) pair lets fetch_yahoo_news skip
-        # the chart-meta round-trip and use the proper company name directly.
-        latest_snap_id = (await session.execute(
-            select(PortfolioSnapshot.id)
-            .order_by(desc(PortfolioSnapshot.snapshot_at))
-            .limit(1)
-        )).scalar_one_or_none()
+        # Per-ticker news: pull every Yahoo ticker in the anonymous
+        # universe (Phase G), pair each with its display name from
+        # instrument_map when available. No per-user attribution.
+        uni_tickers = (await session.execute(
+            select(TickerUniverse.yahoo_ticker)
+        )).scalars().all()
        ticker_pairs: list[tuple[str, str]] = []
-        if latest_snap_id is not None:
-            positions = (await session.execute(
-                select(Position).where(Position.snapshot_id == latest_snap_id)
-            )).scalars().all()
-            ticker_pairs = [(p.ticker, p.name or p.ticker) for p in positions]
+        if uni_tickers:
+            name_rows = (await session.execute(
+                select(InstrumentMap.yahoo_ticker, InstrumentMap.name)
+                .where(InstrumentMap.yahoo_ticker.in_(uni_tickers))
+            )).all()
+            names = {y: n for y, n in name_rows if y}
+            ticker_pairs = [(t, names.get(t) or t) for t in uni_tickers]

        async with httpx.AsyncClient(follow_redirects=True) as client:
            feed_results = await asyncio.gather(
--- a/app/jobs/portfolio_job.py
+++ b/app/jobs/portfolio_job.py
@ -1,90 +0,0 @@
-"""Hourly Trading 212 snapshot. One Portfolio row per portfolio name
-(currently just 'pie'); one PortfolioSnapshot per run; N Position rows."""
-from __future__ import annotations
-
-import asyncio
-
-import httpx
-from sqlalchemy import select
-
-from app.config import get_settings
-from app.db import utcnow
-from app.jobs._helpers import job_lifecycle, log
-from app.models import Portfolio, PortfolioSnapshot, Position
-from app.services.trading212 import Trading212
-
-
-PORTFOLIO_NAME = "pie"  # only one for now; multi-portfolio extension is schema-ready
-
-
-async def run() -> None:
-    async with job_lifecycle("portfolio_job") as (session, jr):
-        if jr.status == "skipped":
-            return
-        s = get_settings()
-        if not (s.API_KEY and s.SECRET_KEY):
-            log.warning("portfolio_job.skipped_no_creds")
-            jr.status = "skipped"
-            return
-
-        t212 = Trading212()
-        async with httpx.AsyncClient(follow_redirects=True) as client:
-            summary = await t212.summary(client)
-            positions = await t212.positions(client)
-            # The instruments call is heavy (~5 MB / 17k rows) but it's our
-            # only path to a human-readable name per ticker. Once per hour is
-            # fine; later we could cache to disk.
-            try:
-                instruments = await t212.instruments(client)
-                name_by_ticker = {
-                    i["ticker"]: i.get("name") or i.get("shortName") or i["ticker"]
-                    for i in (instruments or [])
-                }
-            except Exception:
-                name_by_ticker = {}
-
-        portfolio = (
-            await session.execute(
-                select(Portfolio).where(Portfolio.name == PORTFOLIO_NAME)
-            )
-        ).scalar_one_or_none()
-        if portfolio is None:
-            portfolio = Portfolio(
-                name=PORTFOLIO_NAME, source="trading212",
-                currency=summary.get("currency", "GBP"),
-            )
-            session.add(portfolio)
-            await session.flush()  # need id for FK
-
-        cash = (summary.get("cash") or {})
-        investments = (summary.get("investments") or {})
-        snap = PortfolioSnapshot(
-            portfolio_id=portfolio.id,
-            snapshot_at=utcnow(),
-            total_value=summary.get("totalValue"),
-            cash=cash.get("availableToTrade"),
-            invested=investments.get("currentValue"),
-            raw_json=summary,
-        )
-        session.add(snap)
-        await session.flush()
-
-        for p in positions or []:
-            tkr = p.get("ticker", "")
-            session.add(Position(
-                snapshot_id=snap.id,
-                ticker=tkr,
-                name=name_by_ticker.get(tkr),
-                quantity=p.get("quantity"),
-                average_price=p.get("averagePrice"),
-                current_price=p.get("currentPrice"),
-                ppl=p.get("ppl"),
-            ))
-
-        await session.commit()
-        jr.items_written = len(positions or []) + 1
-        log.info("portfolio_job.done", positions=len(positions or []))
-
-
-if __name__ == "__main__":
-    asyncio.run(run())
--- a/app/jobs/universe_flush_job.py
+++ b/app/jobs/universe_flush_job.py
@ -0,0 +1,43 @@
+"""Flush the ticker_universe Redis buffer into the DB at 5-min boundaries.
+
+The buffer is keyed by 5-minute wall-clock buckets:
+`ticker_universe:buffer:<bucket_ts>`. This job runs slightly after each
+boundary and reads the *previous* bucket, ensuring it's closed (no new
+writes can land in it). New tickers are inserted into `ticker_universe`;
+already-known ones have their `last_referenced_at` bumped.
+
+The lag between bucket-close and flush is intentional: it batches
+multiple users' uploads into one INSERT, making timing-correlation
+between "user uploaded at T" and "ticker XYZ appeared at T+δ" weaker.
+"""
+from __future__ import annotations
+
+import asyncio
+
+from app.jobs._helpers import job_lifecycle, log
+from app.services.ticker_universe import evict_stale, flush_buffer
+
+
+async def run() -> None:
+    async with job_lifecycle("universe_flush_job") as (session, run):
+        if run.status == "skipped":
+            return
+        out = await flush_buffer(session)
+        run.items_written = out.get("inserted", 0)
+        log.info("universe_flush.done", **out)
+
+
+async def evict_run() -> None:
+    """Separate daily run: prune entries that haven't been referenced
+    within the eviction TTL (60 days). Kept in this module so all
+    universe-maintenance lives in one place."""
+    async with job_lifecycle("universe_evict_job") as (session, run):
+        if run.status == "skipped":
+            return
+        deleted = await evict_stale(session)
+        run.items_written = deleted
+        log.info("universe_evict.done", deleted=deleted)
+
+
+if __name__ == "__main__":
+    asyncio.run(run())