Containerised macro-strategy dashboard: 4-panel web UI (indicators, portfolio, flash news, AI strategic log), MariaDB store, hourly ingestion jobs, OpenRouter-backed AI analysis. Ports the four prototype scripts in the parent dir (market_pulse, flash_news, trading212, strategic_log) into async services backed by a persistent DB and served via FastAPI + Jinja2 + HTMX. APScheduler runs as a separate compose service for crash-safety and easier restarts. Portfolio composition + position names come live from Trading 212; news per-ticker headlines reuse those names. Tone (NOVICE/INTERMEDIATE/ PRO) and analysis style (DRY/SPECULATIVE) are env-configurable and stored on each log row so historical entries show what produced them. Default model is deepseek/deepseek-v4-flash (overridable via env). Light/dark theme toggle, sans-serif for prose surfaces, monospace for data. Bearer-token auth, OpenRouter monthly cost cap, RSS feeds auto- disabled on consecutive failures. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
167 lines
5.7 KiB
Python
167 lines
5.7 KiB
Python
"""RSS feed aggregator + Yahoo per-ticker news.
|
|
|
|
Ported from /home/gg/ownCloud/Family/Finances/Wealth/flash_news.py — same
|
|
parsing, dedupe, and ticker-name resolution logic, async HTTP via httpx.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import re
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
from email.utils import parsedate_to_datetime
|
|
from xml.etree import ElementTree as ET
|
|
|
|
import httpx
|
|
|
|
|
|
UA = {"User-Agent": "Mozilla/5.0 (cassandra) Python/httpx"}
|
|
ATOM_NS = "{http://www.w3.org/2005/Atom}"
|
|
DC_NS = "{http://purl.org/dc/elements/1.1/}"
|
|
YAHOO_NEWS = "https://query1.finance.yahoo.com/v1/finance/search"
|
|
YAHOO_CHART = "https://query1.finance.yahoo.com/v8/finance/chart/{symbol}"
|
|
|
|
_NAME_STOPWORDS = {"plc", "corp", "inc", "ltd", "fund", "etf", "ucits",
|
|
"class", "shares", "trust", "the", "and", "of"}
|
|
|
|
|
|
@dataclass
|
|
class Headline:
|
|
when: datetime # tz-aware UTC
|
|
source: str
|
|
category: str
|
|
title: str
|
|
url: str
|
|
|
|
@property
|
|
def fingerprint(self) -> str:
|
|
"""sha1 of normalised title — used as DB UNIQUE."""
|
|
norm = " ".join(self.title.lower().split())
|
|
return hashlib.sha1(norm.encode("utf-8")).hexdigest()
|
|
|
|
|
|
def _parse_date(s: str | None) -> datetime | None:
|
|
if not s:
|
|
return None
|
|
try:
|
|
return parsedate_to_datetime(s).astimezone(timezone.utc)
|
|
except (TypeError, ValueError):
|
|
pass
|
|
try:
|
|
return datetime.fromisoformat(s.replace("Z", "+00:00")).astimezone(timezone.utc)
|
|
except ValueError:
|
|
return None
|
|
|
|
|
|
def parse_feed(name: str, category: str, xml_bytes: bytes) -> list[Headline]:
|
|
try:
|
|
root = ET.fromstring(xml_bytes)
|
|
except ET.ParseError:
|
|
return []
|
|
out: list[Headline] = []
|
|
rss_items = root.findall(".//item")
|
|
if rss_items:
|
|
for it in rss_items:
|
|
title = (it.findtext("title") or "").strip()
|
|
link = (it.findtext("link") or "").strip()
|
|
pub = it.findtext("pubDate") or it.findtext(f"{DC_NS}date")
|
|
when = _parse_date(pub) or datetime.now(timezone.utc)
|
|
if title and link:
|
|
out.append(Headline(when, name, category, title, link))
|
|
else:
|
|
for entry in root.findall(f".//{ATOM_NS}entry"):
|
|
title = (entry.findtext(f"{ATOM_NS}title") or "").strip()
|
|
link_el = entry.find(f"{ATOM_NS}link")
|
|
link = (link_el.get("href") if link_el is not None else "") or ""
|
|
pub = entry.findtext(f"{ATOM_NS}published") or entry.findtext(f"{ATOM_NS}updated")
|
|
when = _parse_date(pub) or datetime.now(timezone.utc)
|
|
if title and link:
|
|
out.append(Headline(when, name, category, title, link.strip()))
|
|
return out
|
|
|
|
|
|
async def fetch_feed(
|
|
client: httpx.AsyncClient, name: str, category: str, url: str
|
|
) -> list[Headline]:
|
|
"""Returns headlines on success, empty list on any failure (caller logs)."""
|
|
r = await client.get(url, headers=UA, timeout=12)
|
|
r.raise_for_status()
|
|
return parse_feed(name, category, r.content)
|
|
|
|
|
|
async def _resolve_ticker_name(client: httpx.AsyncClient, ticker: str) -> str:
|
|
"""Look up the company longName so news search hits headlines that actually
|
|
mention the company rather than matching the literal ticker string."""
|
|
try:
|
|
r = await client.get(
|
|
YAHOO_CHART.format(symbol=ticker),
|
|
params={"interval": "1d", "range": "5d"},
|
|
headers=UA, timeout=8,
|
|
)
|
|
r.raise_for_status()
|
|
meta = r.json()["chart"]["result"][0]["meta"]
|
|
return meta.get("longName") or meta.get("shortName") or ticker
|
|
except Exception:
|
|
return ticker
|
|
|
|
|
|
async def fetch_yahoo_news(
|
|
client: httpx.AsyncClient,
|
|
ticker: str,
|
|
count: int = 10,
|
|
query_override: str | None = None,
|
|
) -> list[Headline]:
|
|
"""Filtered Yahoo per-ticker headlines. Niche UCITS ETFs return empty
|
|
rather than the generic firehose because of the token-overlap guard.
|
|
|
|
If `query_override` is provided (e.g. a name already fetched from
|
|
Trading 212 instruments), it skips the Yahoo chart-meta round-trip."""
|
|
query = query_override or await _resolve_ticker_name(client, ticker)
|
|
tokens = [
|
|
t.lower() for t in re.split(r"[\s.]+", query)
|
|
if len(t) >= 3 and t.lower() not in _NAME_STOPWORDS
|
|
]
|
|
try:
|
|
r = await client.get(
|
|
YAHOO_NEWS,
|
|
params={"q": query, "newsCount": count, "quotesCount": 0},
|
|
headers=UA, timeout=10,
|
|
)
|
|
r.raise_for_status()
|
|
items = r.json().get("news", [])
|
|
out: list[Headline] = []
|
|
for it in items:
|
|
title = (it.get("title") or "").strip()
|
|
link = (it.get("link") or "").strip()
|
|
if not (title and link):
|
|
continue
|
|
if tokens and not any(t in title.lower() for t in tokens):
|
|
continue
|
|
ts = it.get("providerPublishTime")
|
|
when = (
|
|
datetime.fromtimestamp(ts, timezone.utc) if ts
|
|
else datetime.now(timezone.utc)
|
|
)
|
|
out.append(Headline(when, f"Yahoo:{ticker}", "ticker", title, link))
|
|
return out
|
|
except Exception:
|
|
return []
|
|
|
|
|
|
def dedupe(headlines: list[Headline]) -> list[Headline]:
|
|
"""URL first, then normalised title — same logic as the prototype."""
|
|
seen_url: set[str] = set()
|
|
seen_fp: set[str] = set()
|
|
out: list[Headline] = []
|
|
for h in headlines:
|
|
if h.url in seen_url or h.fingerprint in seen_fp:
|
|
continue
|
|
seen_url.add(h.url)
|
|
seen_fp.add(h.fingerprint)
|
|
out.append(h)
|
|
return out
|
|
|
|
|
|
def matches_any(text: str, keywords: list[str]) -> bool:
|
|
t = text.lower()
|
|
return any(kw in t for kw in keywords)
|