read.markets/app/models.py
Giorgio Gilestro 2013bfa8cc news: auto-tag headlines + market-aware cadence + filter UI
- Move news_job from hourly to 3x/hour (cron 10,30,50), with a CadencePolicy
  gate that throttles to active hours (07-21 UTC weekdays at 20 min), off-hours
  (3 h), weekends (6 h). Keeps the daytime feed fresh without spamming RSS
  sources overnight.
- Tag each headline on ingestion via DeepSeek (BATCH_SIZE=25, max_tokens=4000,
  json.JSONDecoder().raw_decode + per-row regex recovery for resilient parsing).
  Vocabulary: 16 tags including new EU / USA / AI / Conflict. NULL tags are
  picked up automatically on the next news_job run, so back-tagging is implicit
  rather than a separate migration step.
- Tag UI: pill bar above the feed with off → include → exclude cycle on click;
  shift-click jumps straight to exclude. State persists in localStorage and is
  injected into /api/news requests via htmx:configRequest. Per-row chips sit to
  the right of the headline (new 5-column grid: age | source | title | tags |
  UTC) so vertical density stays high.
- Strategic log header bug: model was hallucinating "(Updated 21:30 UTC)" in
  future tense. Bumped PROMPT_VERSION 6→7, added explicit ban on time-of-day
  clauses, and supply the actual current UTC time in the user prompt so the
  model has no need to invent one.

Migration 0012 adds headlines.tags (JSON, nullable). Tests cover vocabulary
integrity, validation/normalisation, and the JSON-recovery parser (17 tests).
2026-05-21 23:25:03 +01:00

242 lines
12 KiB
Python

"""SQLAlchemy models for Cassandra.
Schema rationale lives in /home/gg/.claude/plans/ok-i-think-this-tidy-lake.md.
All datetimes are tz-aware UTC (see app.db.utcnow).
"""
from __future__ import annotations
from datetime import datetime, date
from sqlalchemy import (
JSON,
BigInteger,
Boolean,
Date,
DateTime,
Float,
ForeignKey,
Index,
Integer,
String,
Text,
UniqueConstraint,
)
from sqlalchemy.orm import Mapped, mapped_column, relationship
from app.db import Base, utcnow
class Quote(Base):
__tablename__ = "quotes"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
symbol: Mapped[str] = mapped_column(String(128), nullable=False)
source: Mapped[str] = mapped_column(String(32), nullable=False)
label: Mapped[str] = mapped_column(String(128), default="")
group_name: Mapped[str] = mapped_column(String(64), nullable=False)
price: Mapped[float | None] = mapped_column(Float)
currency: Mapped[str | None] = mapped_column(String(8))
as_of: Mapped[str | None] = mapped_column(String(16)) # provider date string
changes: Mapped[dict | None] = mapped_column(JSON) # {"1d": x, "1m": y, ...}
error: Mapped[str | None] = mapped_column(String(255))
fetched_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
__table_args__ = (
Index("ix_quotes_symbol_fetched", "symbol", "fetched_at"),
Index("ix_quotes_group", "group_name"),
)
class QuoteDaily(Base):
"""Daily rollup — sparkline source. PK on (symbol, date)."""
__tablename__ = "quotes_daily"
symbol: Mapped[str] = mapped_column(String(64), primary_key=True)
date: Mapped[date] = mapped_column(Date, primary_key=True)
close: Mapped[float | None] = mapped_column(Float)
high: Mapped[float | None] = mapped_column(Float)
low: Mapped[float | None] = mapped_column(Float)
source: Mapped[str] = mapped_column(String(32))
class Headline(Base):
__tablename__ = "headlines"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
source: Mapped[str] = mapped_column(String(64), nullable=False)
category: Mapped[str] = mapped_column(String(32), nullable=False)
title: Mapped[str] = mapped_column(String(512), nullable=False)
url: Mapped[str] = mapped_column(String(1024), nullable=False)
published_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
fetched_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
fingerprint: Mapped[str] = mapped_column(String(40), nullable=False) # sha1 of normalised title
# Semantic content tags from app.services.news_tagging. NULL = not yet
# tagged; the next news_job run picks it up. Each entry is one of the
# values in news_tagging.TAG_VOCABULARY.
tags: Mapped[list[str] | None] = mapped_column(JSON, nullable=True)
__table_args__ = (
UniqueConstraint("fingerprint", name="uq_headlines_fingerprint"),
Index("ix_headlines_published", "published_at"),
Index("ix_headlines_category_published", "category", "published_at"),
)
class Feed(Base):
"""Persisted feed state; bootstrapped from default.toml on first startup."""
__tablename__ = "feeds"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
category: Mapped[str] = mapped_column(String(32), nullable=False)
name: Mapped[str] = mapped_column(String(64), nullable=False)
url: Mapped[str] = mapped_column(String(1024), nullable=False)
enabled: Mapped[bool] = mapped_column(Boolean, default=True)
consecutive_failures: Mapped[int] = mapped_column(Integer, default=0)
last_success_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
__table_args__ = (
UniqueConstraint("category", "name", name="uq_feeds_cat_name"),
)
class StrategicLog(Base):
__tablename__ = "strategic_logs"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
generated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, index=True)
model: Mapped[str] = mapped_column(String(64), nullable=False)
anchor_date: Mapped[str | None] = mapped_column(String(16))
prompt_version: Mapped[int] = mapped_column(Integer, default=1)
tone: Mapped[str | None] = mapped_column(String(16)) # NOVICE|INTERMEDIATE|PRO
analysis: Mapped[str | None] = mapped_column(String(16)) # DRY|SPECULATIVE
content: Mapped[str] = mapped_column(Text, nullable=False)
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
completion_tokens: Mapped[int | None] = mapped_column(Integer)
cost_usd: Mapped[float | None] = mapped_column(Float)
class IndicatorSummary(Base):
"""Short AI-generated read for one indicator group, regenerated hourly.
The latest row per group_name is what the dashboard renders."""
__tablename__ = "indicator_summaries"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
group_name: Mapped[str] = mapped_column(String(64), nullable=False)
generated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
model: Mapped[str] = mapped_column(String(64), nullable=False)
tone: Mapped[str | None] = mapped_column(String(16))
analysis: Mapped[str | None] = mapped_column(String(16))
prompt_version: Mapped[int] = mapped_column(Integer, default=1)
content: Mapped[str] = mapped_column(Text, nullable=False)
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
completion_tokens: Mapped[int | None] = mapped_column(Integer)
cost_usd: Mapped[float | None] = mapped_column(Float)
__table_args__ = (Index("ix_indsumm_group_generated", "group_name", "generated_at"),)
class AICall(Base):
"""Cost ledger for OpenRouter calls. Feeds the monthly cap check."""
__tablename__ = "ai_calls"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
called_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, index=True)
model: Mapped[str] = mapped_column(String(64), nullable=False)
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
completion_tokens: Mapped[int | None] = mapped_column(Integer)
cost_usd: Mapped[float | None] = mapped_column(Float)
status: Mapped[str] = mapped_column(String(16), default="ok")
error: Mapped[str | None] = mapped_column(String(512))
# Portfolio / PortfolioSnapshot / Position removed in Phase G —
# holdings live in the browser, the server stores only the anonymous
# ticker universe + public market data.
class User(Base):
"""A user account. Authentication is e-mail-only via one-time codes
(see EmailOTP) — no passwords. Possessing an active session cookie
means the user proved control of `email` at session creation time, so
a separate `email_verified` flag would be redundant."""
__tablename__ = "users"
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
email: Mapped[str] = mapped_column(String(255), nullable=False)
tier: Mapped[str] = mapped_column(String(16), default="free") # free | paid | enterprise
settings_json: Mapped[dict | None] = mapped_column(JSON)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
last_login_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
__table_args__ = (UniqueConstraint("email", name="uq_users_email"),)
class EmailOTP(Base):
"""One-time codes for email verification. The plaintext 6-digit code is
sent in the email; we store an argon2 hash, expiry, attempt count, and
a used_at timestamp so a single code can't be reused or brute-forced."""
__tablename__ = "email_otps"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
email: Mapped[str] = mapped_column(String(255), nullable=False)
code_hash: Mapped[str] = mapped_column(String(255), nullable=False)
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
expires_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
attempts: Mapped[int] = mapped_column(Integer, default=0)
used_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
purpose: Mapped[str] = mapped_column(String(16), default="signup")
__table_args__ = (Index("ix_otps_email_created", "email", "created_at"),)
class InstrumentMap(Base):
"""Maps T212's tickers/shortnames to Yahoo Finance tickers so we can
refresh prices via Yahoo after a user uploads a T212 pie CSV.
Synced periodically from T212's /equity/metadata/instruments endpoint
via the admin's read-only API key. Each row is one T212 listing.
Multiple rows can share a shortName (e.g. SHEL on LSE in GBX vs
SHEL on NYSE in USD); the resolver picks the right one per user."""
__tablename__ = "instrument_map"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
t212_ticker: Mapped[str] = mapped_column(String(64), nullable=False)
t212_shortname: Mapped[str] = mapped_column(String(32), nullable=False)
yahoo_ticker: Mapped[str | None] = mapped_column(String(32))
name: Mapped[str] = mapped_column(String(128), nullable=False)
currency: Mapped[str | None] = mapped_column(String(8))
isin: Mapped[str | None] = mapped_column(String(16))
instrument_type: Mapped[str | None] = mapped_column(String(16))
manual: Mapped[bool] = mapped_column(Boolean, default=False)
last_verified_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
__table_args__ = (
UniqueConstraint("t212_ticker", name="uq_imap_t212_ticker"),
Index("ix_imap_shortname", "t212_shortname"),
Index("ix_imap_isin", "isin"),
)
class TickerUniverse(Base):
"""The set of public tickers Cassandra is currently tracking. Populated
as the union of all users' holdings, *without user attribution* — once
a ticker is in the universe, the row carries no signal as to who put
it there. The /api/universe endpoint returns the entire set (gzipped)
to every authenticated client, so the request body itself doesn't leak
which tickers belong to which user.
Eviction policy: passive aging. last_referenced_at is bumped whenever
the ticker appears in /api/portfolio/parse or /api/analyze. A nightly
cron prunes rows older than UNIVERSE_EVICTION_TTL (60 days).
"""
__tablename__ = "ticker_universe"
yahoo_ticker: Mapped[str] = mapped_column(String(32), primary_key=True)
currency: Mapped[str | None] = mapped_column(String(8))
first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
last_referenced_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
__table_args__ = (Index("ix_universe_last_ref", "last_referenced_at"),)
class JobRun(Base):
"""One row per scheduled-job invocation; powers /api/health + the ops footer."""
__tablename__ = "job_runs"
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
name: Mapped[str] = mapped_column(String(64), nullable=False)
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
status: Mapped[str] = mapped_column(String(16), default="running") # running|success|failed
error: Mapped[str | None] = mapped_column(Text)
items_written: Mapped[int | None] = mapped_column(Integer)
__table_args__ = (Index("ix_jobruns_name_started", "name", "started_at"),)