initial commit — cassandra v0.1

Containerised macro-strategy dashboard: 4-panel web UI (indicators, portfolio, flash news, AI strategic log), MariaDB store, hourly ingestion jobs, OpenRouter-backed AI analysis. Ports the four prototype scripts in the parent dir (market_pulse, flash_news, trading212, strategic_log) into async services backed by a persistent DB and served via FastAPI + Jinja2 + HTMX. APScheduler runs as a separate compose service for crash-safety and easier restarts. Portfolio composition + position names come live from Trading 212; news per-ticker headlines reuse those names. Tone (NOVICE/INTERMEDIATE/ PRO) and analysis style (DRY/SPECULATIVE) are env-configurable and stored on each log row so historical entries show what produced them. Default model is deepseek/deepseek-v4-flash (overridable via env). Light/dark theme toggle, sans-serif for prose surfaces, monospace for data. Bearer-token auth, OpenRouter monthly cost cap, RSS feeds auto- disabled on consecutive failures. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-15 21:56:10 +01:00 · 2026-05-15 21:56:10 +01:00 · a10409c02b
commit a10409c02b
61 changed files with 4890 additions and 0 deletions
--- a/app/services/news.py
+++ b/app/services/news.py
@ -0,0 +1,167 @@
+"""RSS feed aggregator + Yahoo per-ticker news.
+
+Ported from /home/gg/ownCloud/Family/Finances/Wealth/flash_news.py — same
+parsing, dedupe, and ticker-name resolution logic, async HTTP via httpx.
+"""
+from __future__ import annotations
+
+import hashlib
+import re
+from dataclasses import dataclass
+from datetime import datetime, timezone
+from email.utils import parsedate_to_datetime
+from xml.etree import ElementTree as ET
+
+import httpx
+
+
+UA = {"User-Agent": "Mozilla/5.0 (cassandra) Python/httpx"}
+ATOM_NS = "{http://www.w3.org/2005/Atom}"
+DC_NS = "{http://purl.org/dc/elements/1.1/}"
+YAHOO_NEWS = "https://query1.finance.yahoo.com/v1/finance/search"
+YAHOO_CHART = "https://query1.finance.yahoo.com/v8/finance/chart/{symbol}"
+
+_NAME_STOPWORDS = {"plc", "corp", "inc", "ltd", "fund", "etf", "ucits",
+                   "class", "shares", "trust", "the", "and", "of"}
+
+
+@dataclass
+class Headline:
+    when: datetime  # tz-aware UTC
+    source: str
+    category: str
+    title: str
+    url: str
+
+    @property
+    def fingerprint(self) -> str:
+        """sha1 of normalised title — used as DB UNIQUE."""
+        norm = " ".join(self.title.lower().split())
+        return hashlib.sha1(norm.encode("utf-8")).hexdigest()
+
+
+def _parse_date(s: str | None) -> datetime | None:
+    if not s:
+        return None
+    try:
+        return parsedate_to_datetime(s).astimezone(timezone.utc)
+    except (TypeError, ValueError):
+        pass
+    try:
+        return datetime.fromisoformat(s.replace("Z", "+00:00")).astimezone(timezone.utc)
+    except ValueError:
+        return None
+
+
+def parse_feed(name: str, category: str, xml_bytes: bytes) -> list[Headline]:
+    try:
+        root = ET.fromstring(xml_bytes)
+    except ET.ParseError:
+        return []
+    out: list[Headline] = []
+    rss_items = root.findall(".//item")
+    if rss_items:
+        for it in rss_items:
+            title = (it.findtext("title") or "").strip()
+            link = (it.findtext("link") or "").strip()
+            pub = it.findtext("pubDate") or it.findtext(f"{DC_NS}date")
+            when = _parse_date(pub) or datetime.now(timezone.utc)
+            if title and link:
+                out.append(Headline(when, name, category, title, link))
+    else:
+        for entry in root.findall(f".//{ATOM_NS}entry"):
+            title = (entry.findtext(f"{ATOM_NS}title") or "").strip()
+            link_el = entry.find(f"{ATOM_NS}link")
+            link = (link_el.get("href") if link_el is not None else "") or ""
+            pub = entry.findtext(f"{ATOM_NS}published") or entry.findtext(f"{ATOM_NS}updated")
+            when = _parse_date(pub) or datetime.now(timezone.utc)
+            if title and link:
+                out.append(Headline(when, name, category, title, link.strip()))
+    return out
+
+
+async def fetch_feed(
+    client: httpx.AsyncClient, name: str, category: str, url: str
+) -> list[Headline]:
+    """Returns headlines on success, empty list on any failure (caller logs)."""
+    r = await client.get(url, headers=UA, timeout=12)
+    r.raise_for_status()
+    return parse_feed(name, category, r.content)
+
+
+async def _resolve_ticker_name(client: httpx.AsyncClient, ticker: str) -> str:
+    """Look up the company longName so news search hits headlines that actually
+    mention the company rather than matching the literal ticker string."""
+    try:
+        r = await client.get(
+            YAHOO_CHART.format(symbol=ticker),
+            params={"interval": "1d", "range": "5d"},
+            headers=UA, timeout=8,
+        )
+        r.raise_for_status()
+        meta = r.json()["chart"]["result"][0]["meta"]
+        return meta.get("longName") or meta.get("shortName") or ticker
+    except Exception:
+        return ticker
+
+
+async def fetch_yahoo_news(
+    client: httpx.AsyncClient,
+    ticker: str,
+    count: int = 10,
+    query_override: str | None = None,
+) -> list[Headline]:
+    """Filtered Yahoo per-ticker headlines. Niche UCITS ETFs return empty
+    rather than the generic firehose because of the token-overlap guard.
+
+    If `query_override` is provided (e.g. a name already fetched from
+    Trading 212 instruments), it skips the Yahoo chart-meta round-trip."""
+    query = query_override or await _resolve_ticker_name(client, ticker)
+    tokens = [
+        t.lower() for t in re.split(r"[\s.]+", query)
+        if len(t) >= 3 and t.lower() not in _NAME_STOPWORDS
+    ]
+    try:
+        r = await client.get(
+            YAHOO_NEWS,
+            params={"q": query, "newsCount": count, "quotesCount": 0},
+            headers=UA, timeout=10,
+        )
+        r.raise_for_status()
+        items = r.json().get("news", [])
+        out: list[Headline] = []
+        for it in items:
+            title = (it.get("title") or "").strip()
+            link = (it.get("link") or "").strip()
+            if not (title and link):
+                continue
+            if tokens and not any(t in title.lower() for t in tokens):
+                continue
+            ts = it.get("providerPublishTime")
+            when = (
+                datetime.fromtimestamp(ts, timezone.utc) if ts
+                else datetime.now(timezone.utc)
+            )
+            out.append(Headline(when, f"Yahoo:{ticker}", "ticker", title, link))
+        return out
+    except Exception:
+        return []
+
+
+def dedupe(headlines: list[Headline]) -> list[Headline]:
+    """URL first, then normalised title — same logic as the prototype."""
+    seen_url: set[str] = set()
+    seen_fp: set[str] = set()
+    out: list[Headline] = []
+    for h in headlines:
+        if h.url in seen_url or h.fingerprint in seen_fp:
+            continue
+        seen_url.add(h.url)
+        seen_fp.add(h.fingerprint)
+        out.append(h)
+    return out
+
+
+def matches_any(text: str, keywords: list[str]) -> bool:
+    t = text.lower()
+    return any(kw in t for kw in keywords)