initial commit — cassandra v0.1
Containerised macro-strategy dashboard: 4-panel web UI (indicators, portfolio, flash news, AI strategic log), MariaDB store, hourly ingestion jobs, OpenRouter-backed AI analysis. Ports the four prototype scripts in the parent dir (market_pulse, flash_news, trading212, strategic_log) into async services backed by a persistent DB and served via FastAPI + Jinja2 + HTMX. APScheduler runs as a separate compose service for crash-safety and easier restarts. Portfolio composition + position names come live from Trading 212; news per-ticker headlines reuse those names. Tone (NOVICE/INTERMEDIATE/ PRO) and analysis style (DRY/SPECULATIVE) are env-configurable and stored on each log row so historical entries show what produced them. Default model is deepseek/deepseek-v4-flash (overridable via env). Light/dark theme toggle, sans-serif for prose surfaces, monospace for data. Bearer-token auth, OpenRouter monthly cost cap, RSS feeds auto- disabled on consecutive failures. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
commit
a10409c02b
61 changed files with 4890 additions and 0 deletions
167
app/services/news.py
Normal file
167
app/services/news.py
Normal file
|
|
@ -0,0 +1,167 @@
|
|||
"""RSS feed aggregator + Yahoo per-ticker news.
|
||||
|
||||
Ported from /home/gg/ownCloud/Family/Finances/Wealth/flash_news.py — same
|
||||
parsing, dedupe, and ticker-name resolution logic, async HTTP via httpx.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from email.utils import parsedate_to_datetime
|
||||
from xml.etree import ElementTree as ET
|
||||
|
||||
import httpx
|
||||
|
||||
|
||||
UA = {"User-Agent": "Mozilla/5.0 (cassandra) Python/httpx"}
|
||||
ATOM_NS = "{http://www.w3.org/2005/Atom}"
|
||||
DC_NS = "{http://purl.org/dc/elements/1.1/}"
|
||||
YAHOO_NEWS = "https://query1.finance.yahoo.com/v1/finance/search"
|
||||
YAHOO_CHART = "https://query1.finance.yahoo.com/v8/finance/chart/{symbol}"
|
||||
|
||||
_NAME_STOPWORDS = {"plc", "corp", "inc", "ltd", "fund", "etf", "ucits",
|
||||
"class", "shares", "trust", "the", "and", "of"}
|
||||
|
||||
|
||||
@dataclass
|
||||
class Headline:
|
||||
when: datetime # tz-aware UTC
|
||||
source: str
|
||||
category: str
|
||||
title: str
|
||||
url: str
|
||||
|
||||
@property
|
||||
def fingerprint(self) -> str:
|
||||
"""sha1 of normalised title — used as DB UNIQUE."""
|
||||
norm = " ".join(self.title.lower().split())
|
||||
return hashlib.sha1(norm.encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _parse_date(s: str | None) -> datetime | None:
|
||||
if not s:
|
||||
return None
|
||||
try:
|
||||
return parsedate_to_datetime(s).astimezone(timezone.utc)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
try:
|
||||
return datetime.fromisoformat(s.replace("Z", "+00:00")).astimezone(timezone.utc)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
|
||||
def parse_feed(name: str, category: str, xml_bytes: bytes) -> list[Headline]:
|
||||
try:
|
||||
root = ET.fromstring(xml_bytes)
|
||||
except ET.ParseError:
|
||||
return []
|
||||
out: list[Headline] = []
|
||||
rss_items = root.findall(".//item")
|
||||
if rss_items:
|
||||
for it in rss_items:
|
||||
title = (it.findtext("title") or "").strip()
|
||||
link = (it.findtext("link") or "").strip()
|
||||
pub = it.findtext("pubDate") or it.findtext(f"{DC_NS}date")
|
||||
when = _parse_date(pub) or datetime.now(timezone.utc)
|
||||
if title and link:
|
||||
out.append(Headline(when, name, category, title, link))
|
||||
else:
|
||||
for entry in root.findall(f".//{ATOM_NS}entry"):
|
||||
title = (entry.findtext(f"{ATOM_NS}title") or "").strip()
|
||||
link_el = entry.find(f"{ATOM_NS}link")
|
||||
link = (link_el.get("href") if link_el is not None else "") or ""
|
||||
pub = entry.findtext(f"{ATOM_NS}published") or entry.findtext(f"{ATOM_NS}updated")
|
||||
when = _parse_date(pub) or datetime.now(timezone.utc)
|
||||
if title and link:
|
||||
out.append(Headline(when, name, category, title, link.strip()))
|
||||
return out
|
||||
|
||||
|
||||
async def fetch_feed(
|
||||
client: httpx.AsyncClient, name: str, category: str, url: str
|
||||
) -> list[Headline]:
|
||||
"""Returns headlines on success, empty list on any failure (caller logs)."""
|
||||
r = await client.get(url, headers=UA, timeout=12)
|
||||
r.raise_for_status()
|
||||
return parse_feed(name, category, r.content)
|
||||
|
||||
|
||||
async def _resolve_ticker_name(client: httpx.AsyncClient, ticker: str) -> str:
|
||||
"""Look up the company longName so news search hits headlines that actually
|
||||
mention the company rather than matching the literal ticker string."""
|
||||
try:
|
||||
r = await client.get(
|
||||
YAHOO_CHART.format(symbol=ticker),
|
||||
params={"interval": "1d", "range": "5d"},
|
||||
headers=UA, timeout=8,
|
||||
)
|
||||
r.raise_for_status()
|
||||
meta = r.json()["chart"]["result"][0]["meta"]
|
||||
return meta.get("longName") or meta.get("shortName") or ticker
|
||||
except Exception:
|
||||
return ticker
|
||||
|
||||
|
||||
async def fetch_yahoo_news(
|
||||
client: httpx.AsyncClient,
|
||||
ticker: str,
|
||||
count: int = 10,
|
||||
query_override: str | None = None,
|
||||
) -> list[Headline]:
|
||||
"""Filtered Yahoo per-ticker headlines. Niche UCITS ETFs return empty
|
||||
rather than the generic firehose because of the token-overlap guard.
|
||||
|
||||
If `query_override` is provided (e.g. a name already fetched from
|
||||
Trading 212 instruments), it skips the Yahoo chart-meta round-trip."""
|
||||
query = query_override or await _resolve_ticker_name(client, ticker)
|
||||
tokens = [
|
||||
t.lower() for t in re.split(r"[\s.]+", query)
|
||||
if len(t) >= 3 and t.lower() not in _NAME_STOPWORDS
|
||||
]
|
||||
try:
|
||||
r = await client.get(
|
||||
YAHOO_NEWS,
|
||||
params={"q": query, "newsCount": count, "quotesCount": 0},
|
||||
headers=UA, timeout=10,
|
||||
)
|
||||
r.raise_for_status()
|
||||
items = r.json().get("news", [])
|
||||
out: list[Headline] = []
|
||||
for it in items:
|
||||
title = (it.get("title") or "").strip()
|
||||
link = (it.get("link") or "").strip()
|
||||
if not (title and link):
|
||||
continue
|
||||
if tokens and not any(t in title.lower() for t in tokens):
|
||||
continue
|
||||
ts = it.get("providerPublishTime")
|
||||
when = (
|
||||
datetime.fromtimestamp(ts, timezone.utc) if ts
|
||||
else datetime.now(timezone.utc)
|
||||
)
|
||||
out.append(Headline(when, f"Yahoo:{ticker}", "ticker", title, link))
|
||||
return out
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def dedupe(headlines: list[Headline]) -> list[Headline]:
|
||||
"""URL first, then normalised title — same logic as the prototype."""
|
||||
seen_url: set[str] = set()
|
||||
seen_fp: set[str] = set()
|
||||
out: list[Headline] = []
|
||||
for h in headlines:
|
||||
if h.url in seen_url or h.fingerprint in seen_fp:
|
||||
continue
|
||||
seen_url.add(h.url)
|
||||
seen_fp.add(h.fingerprint)
|
||||
out.append(h)
|
||||
return out
|
||||
|
||||
|
||||
def matches_any(text: str, keywords: list[str]) -> bool:
|
||||
t = text.lower()
|
||||
return any(kw in t for kw in keywords)
|
||||
Loading…
Add table
Add a link
Reference in a new issue