- Move news_job from hourly to 3x/hour (cron 10,30,50), with a CadencePolicy gate that throttles to active hours (07-21 UTC weekdays at 20 min), off-hours (3 h), weekends (6 h). Keeps the daytime feed fresh without spamming RSS sources overnight. - Tag each headline on ingestion via DeepSeek (BATCH_SIZE=25, max_tokens=4000, json.JSONDecoder().raw_decode + per-row regex recovery for resilient parsing). Vocabulary: 16 tags including new EU / USA / AI / Conflict. NULL tags are picked up automatically on the next news_job run, so back-tagging is implicit rather than a separate migration step. - Tag UI: pill bar above the feed with off → include → exclude cycle on click; shift-click jumps straight to exclude. State persists in localStorage and is injected into /api/news requests via htmx:configRequest. Per-row chips sit to the right of the headline (new 5-column grid: age | source | title | tags | UTC) so vertical density stays high. - Strategic log header bug: model was hallucinating "(Updated 21:30 UTC)" in future tense. Bumped PROMPT_VERSION 6→7, added explicit ban on time-of-day clauses, and supply the actual current UTC time in the user prompt so the model has no need to invent one. Migration 0012 adds headlines.tags (JSON, nullable). Tests cover vocabulary integrity, validation/normalisation, and the JSON-recovery parser (17 tests).
93 lines
4 KiB
Python
93 lines
4 KiB
Python
"""When should expensive AI jobs fire?
|
|
|
|
Markets matter. The scheduler wakes every hour, but there's no point spending
|
|
OpenRouter tokens at 03:00 UTC on a Sunday when nothing has moved. This module
|
|
encodes a single policy: weekday active hours (LSE open through NYSE close,
|
|
roughly 07:00-21:00 UTC) get the full hourly cadence; off-hours and weekends
|
|
get throttled.
|
|
|
|
Used by ai_log_job and indicator_summary_job to decide whether to run NOW or
|
|
skip until enough time has passed since the last successful run. Market /
|
|
news / portfolio ingestion jobs keep running hourly — they're cheap.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass
|
|
from datetime import datetime, timezone
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
class CadencePolicy:
|
|
# Active trading windows in UTC. A timestamp is "active" if its hour
|
|
# falls in ANY listed window. Add or remove tuples to change coverage.
|
|
#
|
|
# LSE opens 07:00 BST → 07:00 UTC summer / 08:00 UTC winter.
|
|
# NYSE closes 16:00 ET → 21:00 UTC summer / 21:00 UTC winter.
|
|
# Tokyo trades 09:00-15:00 JST → 00:00-06:00 UTC.
|
|
# HK/Shanghai trade 09:30-16:00 local → 01:30-08:00 UTC.
|
|
active_windows: tuple[tuple[int, int], ...] = (
|
|
(7, 21), # EU/US (LSE open through NYSE close)
|
|
# (0, 8), # Asia (Tokyo + HK/Shanghai) — uncomment to add
|
|
)
|
|
# Minimum gap between successful runs DURING the active window. The
|
|
# cron may fire more frequently than this — we just skip until enough
|
|
# time has passed since the last success. Default 0 means "run on
|
|
# every cron fire" (the original AI-job behaviour).
|
|
active_gap_h: float = 0.0
|
|
# Minimum gap between successful runs outside the active window.
|
|
off_hours_gap_h: float = 4.0
|
|
weekend_gap_h: float = 12.0
|
|
|
|
def is_active_window(self, now: datetime | None = None) -> bool:
|
|
now = now or datetime.now(timezone.utc)
|
|
if now.weekday() >= 5: # Saturday / Sunday
|
|
return False
|
|
return any(start <= now.hour < end for start, end in self.active_windows)
|
|
|
|
def min_gap_hours(self, now: datetime | None = None) -> float:
|
|
now = now or datetime.now(timezone.utc)
|
|
if now.weekday() >= 5:
|
|
return self.weekend_gap_h
|
|
if self.is_active_window(now):
|
|
return self.active_gap_h
|
|
return self.off_hours_gap_h
|
|
|
|
def should_run(
|
|
self,
|
|
last_success_at: datetime | None,
|
|
now: datetime | None = None,
|
|
) -> tuple[bool, str]:
|
|
"""Returns (should_run, reason). The reason is human-readable for logs
|
|
and the job_runs.error column when a run is skipped."""
|
|
now = now or datetime.now(timezone.utc)
|
|
min_gap = self.min_gap_hours(now)
|
|
if last_success_at is None:
|
|
return True, "no prior successful run"
|
|
# Normalise tz; DB returns naive but we treat it as UTC.
|
|
if last_success_at.tzinfo is None:
|
|
last_success_at = last_success_at.replace(tzinfo=timezone.utc)
|
|
age_h = (now - last_success_at).total_seconds() / 3600.0
|
|
if min_gap <= 0 and self.is_active_window(now):
|
|
return True, "active window"
|
|
if age_h >= min_gap:
|
|
band = "active" if self.is_active_window(now) else (
|
|
"weekend" if now.weekday() >= 5 else "off-hours"
|
|
)
|
|
return True, f"{band}: last run {age_h:.2f}h ago (≥ {min_gap:.2f}h)"
|
|
band = "active" if self.is_active_window(now) else (
|
|
"weekend" if now.weekday() >= 5 else "off-hours"
|
|
)
|
|
return False, f"{band} throttled — last run {age_h:.2f}h ago (< {min_gap:.2f}h)"
|
|
|
|
|
|
# AI jobs: run hot during the active window, throttle off-hours.
|
|
DEFAULT_POLICY = CadencePolicy()
|
|
|
|
# News + tagging: 3 runs/hour during the active window (20-min gap),
|
|
# every 3h off-hours, every 6h on weekends. Cron fires every 20 min;
|
|
# the policy gates whether each fire actually does work.
|
|
NEWS_POLICY = CadencePolicy(
|
|
active_gap_h=1.0 / 3.0, # 20 minutes
|
|
off_hours_gap_h=3.0,
|
|
weekend_gap_h=6.0,
|
|
)
|