- Move news_job from hourly to 3x/hour (cron 10,30,50), with a CadencePolicy gate that throttles to active hours (07-21 UTC weekdays at 20 min), off-hours (3 h), weekends (6 h). Keeps the daytime feed fresh without spamming RSS sources overnight. - Tag each headline on ingestion via DeepSeek (BATCH_SIZE=25, max_tokens=4000, json.JSONDecoder().raw_decode + per-row regex recovery for resilient parsing). Vocabulary: 16 tags including new EU / USA / AI / Conflict. NULL tags are picked up automatically on the next news_job run, so back-tagging is implicit rather than a separate migration step. - Tag UI: pill bar above the feed with off → include → exclude cycle on click; shift-click jumps straight to exclude. State persists in localStorage and is injected into /api/news requests via htmx:configRequest. Per-row chips sit to the right of the headline (new 5-column grid: age | source | title | tags | UTC) so vertical density stays high. - Strategic log header bug: model was hallucinating "(Updated 21:30 UTC)" in future tense. Bumped PROMPT_VERSION 6→7, added explicit ban on time-of-day clauses, and supply the actual current UTC time in the user prompt so the model has no need to invent one. Migration 0012 adds headlines.tags (JSON, nullable). Tests cover vocabulary integrity, validation/normalisation, and the JSON-recovery parser (17 tests).
242 lines
12 KiB
Python
242 lines
12 KiB
Python
"""SQLAlchemy models for Cassandra.
|
|
|
|
Schema rationale lives in /home/gg/.claude/plans/ok-i-think-this-tidy-lake.md.
|
|
All datetimes are tz-aware UTC (see app.db.utcnow).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from datetime import datetime, date
|
|
|
|
from sqlalchemy import (
|
|
JSON,
|
|
BigInteger,
|
|
Boolean,
|
|
Date,
|
|
DateTime,
|
|
Float,
|
|
ForeignKey,
|
|
Index,
|
|
Integer,
|
|
String,
|
|
Text,
|
|
UniqueConstraint,
|
|
)
|
|
from sqlalchemy.orm import Mapped, mapped_column, relationship
|
|
|
|
from app.db import Base, utcnow
|
|
|
|
|
|
class Quote(Base):
|
|
__tablename__ = "quotes"
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
|
symbol: Mapped[str] = mapped_column(String(128), nullable=False)
|
|
source: Mapped[str] = mapped_column(String(32), nullable=False)
|
|
label: Mapped[str] = mapped_column(String(128), default="")
|
|
group_name: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
price: Mapped[float | None] = mapped_column(Float)
|
|
currency: Mapped[str | None] = mapped_column(String(8))
|
|
as_of: Mapped[str | None] = mapped_column(String(16)) # provider date string
|
|
changes: Mapped[dict | None] = mapped_column(JSON) # {"1d": x, "1m": y, ...}
|
|
error: Mapped[str | None] = mapped_column(String(255))
|
|
fetched_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
|
|
__table_args__ = (
|
|
Index("ix_quotes_symbol_fetched", "symbol", "fetched_at"),
|
|
Index("ix_quotes_group", "group_name"),
|
|
)
|
|
|
|
|
|
class QuoteDaily(Base):
|
|
"""Daily rollup — sparkline source. PK on (symbol, date)."""
|
|
__tablename__ = "quotes_daily"
|
|
symbol: Mapped[str] = mapped_column(String(64), primary_key=True)
|
|
date: Mapped[date] = mapped_column(Date, primary_key=True)
|
|
close: Mapped[float | None] = mapped_column(Float)
|
|
high: Mapped[float | None] = mapped_column(Float)
|
|
low: Mapped[float | None] = mapped_column(Float)
|
|
source: Mapped[str] = mapped_column(String(32))
|
|
|
|
|
|
class Headline(Base):
|
|
__tablename__ = "headlines"
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
|
source: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
category: Mapped[str] = mapped_column(String(32), nullable=False)
|
|
title: Mapped[str] = mapped_column(String(512), nullable=False)
|
|
url: Mapped[str] = mapped_column(String(1024), nullable=False)
|
|
published_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
|
fetched_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
fingerprint: Mapped[str] = mapped_column(String(40), nullable=False) # sha1 of normalised title
|
|
# Semantic content tags from app.services.news_tagging. NULL = not yet
|
|
# tagged; the next news_job run picks it up. Each entry is one of the
|
|
# values in news_tagging.TAG_VOCABULARY.
|
|
tags: Mapped[list[str] | None] = mapped_column(JSON, nullable=True)
|
|
|
|
__table_args__ = (
|
|
UniqueConstraint("fingerprint", name="uq_headlines_fingerprint"),
|
|
Index("ix_headlines_published", "published_at"),
|
|
Index("ix_headlines_category_published", "category", "published_at"),
|
|
)
|
|
|
|
|
|
class Feed(Base):
|
|
"""Persisted feed state; bootstrapped from default.toml on first startup."""
|
|
__tablename__ = "feeds"
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
category: Mapped[str] = mapped_column(String(32), nullable=False)
|
|
name: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
url: Mapped[str] = mapped_column(String(1024), nullable=False)
|
|
enabled: Mapped[bool] = mapped_column(Boolean, default=True)
|
|
consecutive_failures: Mapped[int] = mapped_column(Integer, default=0)
|
|
last_success_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
|
|
__table_args__ = (
|
|
UniqueConstraint("category", "name", name="uq_feeds_cat_name"),
|
|
)
|
|
|
|
|
|
class StrategicLog(Base):
|
|
__tablename__ = "strategic_logs"
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
|
generated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, index=True)
|
|
model: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
anchor_date: Mapped[str | None] = mapped_column(String(16))
|
|
prompt_version: Mapped[int] = mapped_column(Integer, default=1)
|
|
tone: Mapped[str | None] = mapped_column(String(16)) # NOVICE|INTERMEDIATE|PRO
|
|
analysis: Mapped[str | None] = mapped_column(String(16)) # DRY|SPECULATIVE
|
|
content: Mapped[str] = mapped_column(Text, nullable=False)
|
|
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
|
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
|
cost_usd: Mapped[float | None] = mapped_column(Float)
|
|
|
|
|
|
class IndicatorSummary(Base):
|
|
"""Short AI-generated read for one indicator group, regenerated hourly.
|
|
The latest row per group_name is what the dashboard renders."""
|
|
__tablename__ = "indicator_summaries"
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
|
group_name: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
generated_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
model: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
tone: Mapped[str | None] = mapped_column(String(16))
|
|
analysis: Mapped[str | None] = mapped_column(String(16))
|
|
prompt_version: Mapped[int] = mapped_column(Integer, default=1)
|
|
content: Mapped[str] = mapped_column(Text, nullable=False)
|
|
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
|
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
|
cost_usd: Mapped[float | None] = mapped_column(Float)
|
|
|
|
__table_args__ = (Index("ix_indsumm_group_generated", "group_name", "generated_at"),)
|
|
|
|
|
|
class AICall(Base):
|
|
"""Cost ledger for OpenRouter calls. Feeds the monthly cap check."""
|
|
__tablename__ = "ai_calls"
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
|
called_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow, index=True)
|
|
model: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
prompt_tokens: Mapped[int | None] = mapped_column(Integer)
|
|
completion_tokens: Mapped[int | None] = mapped_column(Integer)
|
|
cost_usd: Mapped[float | None] = mapped_column(Float)
|
|
status: Mapped[str] = mapped_column(String(16), default="ok")
|
|
error: Mapped[str | None] = mapped_column(String(512))
|
|
|
|
|
|
# Portfolio / PortfolioSnapshot / Position removed in Phase G —
|
|
# holdings live in the browser, the server stores only the anonymous
|
|
# ticker universe + public market data.
|
|
|
|
|
|
class User(Base):
|
|
"""A user account. Authentication is e-mail-only via one-time codes
|
|
(see EmailOTP) — no passwords. Possessing an active session cookie
|
|
means the user proved control of `email` at session creation time, so
|
|
a separate `email_verified` flag would be redundant."""
|
|
__tablename__ = "users"
|
|
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True)
|
|
email: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
tier: Mapped[str] = mapped_column(String(16), default="free") # free | paid | enterprise
|
|
settings_json: Mapped[dict | None] = mapped_column(JSON)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
last_login_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
|
|
__table_args__ = (UniqueConstraint("email", name="uq_users_email"),)
|
|
|
|
|
|
class EmailOTP(Base):
|
|
"""One-time codes for email verification. The plaintext 6-digit code is
|
|
sent in the email; we store an argon2 hash, expiry, attempt count, and
|
|
a used_at timestamp so a single code can't be reused or brute-forced."""
|
|
__tablename__ = "email_otps"
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
|
email: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
code_hash: Mapped[str] = mapped_column(String(255), nullable=False)
|
|
created_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
expires_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), nullable=False)
|
|
attempts: Mapped[int] = mapped_column(Integer, default=0)
|
|
used_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
purpose: Mapped[str] = mapped_column(String(16), default="signup")
|
|
|
|
__table_args__ = (Index("ix_otps_email_created", "email", "created_at"),)
|
|
|
|
|
|
class InstrumentMap(Base):
|
|
"""Maps T212's tickers/shortnames to Yahoo Finance tickers so we can
|
|
refresh prices via Yahoo after a user uploads a T212 pie CSV.
|
|
|
|
Synced periodically from T212's /equity/metadata/instruments endpoint
|
|
via the admin's read-only API key. Each row is one T212 listing.
|
|
Multiple rows can share a shortName (e.g. SHEL on LSE in GBX vs
|
|
SHEL on NYSE in USD); the resolver picks the right one per user."""
|
|
__tablename__ = "instrument_map"
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
|
t212_ticker: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
t212_shortname: Mapped[str] = mapped_column(String(32), nullable=False)
|
|
yahoo_ticker: Mapped[str | None] = mapped_column(String(32))
|
|
name: Mapped[str] = mapped_column(String(128), nullable=False)
|
|
currency: Mapped[str | None] = mapped_column(String(8))
|
|
isin: Mapped[str | None] = mapped_column(String(16))
|
|
instrument_type: Mapped[str | None] = mapped_column(String(16))
|
|
manual: Mapped[bool] = mapped_column(Boolean, default=False)
|
|
last_verified_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
|
|
__table_args__ = (
|
|
UniqueConstraint("t212_ticker", name="uq_imap_t212_ticker"),
|
|
Index("ix_imap_shortname", "t212_shortname"),
|
|
Index("ix_imap_isin", "isin"),
|
|
)
|
|
|
|
|
|
class TickerUniverse(Base):
|
|
"""The set of public tickers Cassandra is currently tracking. Populated
|
|
as the union of all users' holdings, *without user attribution* — once
|
|
a ticker is in the universe, the row carries no signal as to who put
|
|
it there. The /api/universe endpoint returns the entire set (gzipped)
|
|
to every authenticated client, so the request body itself doesn't leak
|
|
which tickers belong to which user.
|
|
|
|
Eviction policy: passive aging. last_referenced_at is bumped whenever
|
|
the ticker appears in /api/portfolio/parse or /api/analyze. A nightly
|
|
cron prunes rows older than UNIVERSE_EVICTION_TTL (60 days).
|
|
"""
|
|
__tablename__ = "ticker_universe"
|
|
yahoo_ticker: Mapped[str] = mapped_column(String(32), primary_key=True)
|
|
currency: Mapped[str | None] = mapped_column(String(8))
|
|
first_seen_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
last_referenced_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
|
|
__table_args__ = (Index("ix_universe_last_ref", "last_referenced_at"),)
|
|
|
|
|
|
class JobRun(Base):
|
|
"""One row per scheduled-job invocation; powers /api/health + the ops footer."""
|
|
__tablename__ = "job_runs"
|
|
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
|
name: Mapped[str] = mapped_column(String(64), nullable=False)
|
|
started_at: Mapped[datetime] = mapped_column(DateTime(timezone=True), default=utcnow)
|
|
finished_at: Mapped[datetime | None] = mapped_column(DateTime(timezone=True))
|
|
status: Mapped[str] = mapped_column(String(16), default="running") # running|success|failed
|
|
error: Mapped[str | None] = mapped_column(Text)
|
|
items_written: Mapped[int | None] = mapped_column(Integer)
|
|
|
|
__table_args__ = (Index("ix_jobruns_name_started", "name", "started_at"),)
|