From 4adc8dfe8299deca7a149801dfa600135bba2018 Mon Sep 17 00:00:00 2001 From: Giorgio Gilestro Date: Wed, 27 May 2026 21:27:23 +0200 Subject: [PATCH] openrouter: split into llm_prompts (prompt engineering) + transport MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit openrouter.py was 790 lines mixing two orthogonal concerns: - Prompt engineering (build_system_prompt, build_summary_*, build_chat_*, build_daily_digest_*, etc.) — ~400 lines, changes weekly as PROMPT_VERSION bumps - LLM transport (call_llm, _provider_chain, _call_provider, retry + fallback machinery) — ~250 lines, rarely changes Extracted the prompt-engineering surface to app/services/llm_prompts.py. Transport stays in openrouter.py (consistent with the filename — the OpenRouter URL is the transport's anchor). All import sites (jobs, routers, services, tests) split their multi-import lines into two: prompt-things from llm_prompts, transport from openrouter. PROMPT_VERSION constant, _TONE_ALIASES, _resolve_tone, and SYSTEM_PROMPT moved with the prompt functions. No behaviour change — pure relocation. Function signatures, body, and naming all preserved. Co-Authored-By: Claude Opus 4.7 --- app/jobs/ai_log_job.py | 6 +- app/jobs/email_digest_job.py | 4 +- app/jobs/indicator_summary_job.py | 6 +- app/routers/api.py | 4 +- app/services/llm_prompts.py | 597 +++++++++++++++++++++++++++++ app/services/openrouter.py | 589 +--------------------------- app/services/portfolio_analysis.py | 2 +- tests/test_digest_prompts.py | 2 +- tests/test_openrouter_prompt.py | 4 +- 9 files changed, 619 insertions(+), 595 deletions(-) create mode 100644 app/services/llm_prompts.py diff --git a/app/jobs/ai_log_job.py b/app/jobs/ai_log_job.py index 59da09d..2c0277e 100644 --- a/app/jobs/ai_log_job.py +++ b/app/jobs/ai_log_job.py @@ -20,11 +20,13 @@ from app.jobs._market_context import ( from app.models import AICall, JobRun, StrategicLog, StrategicLogTranslation, User from app.services.cadence import DEFAULT_POLICY from app.services.i18n import ACTIVE_LANGUAGES -from app.services.openrouter import ( +from app.services.llm_prompts import ( PROMPT_VERSION, - active_model, build_system_prompt, build_user_prompt, +) +from app.services.openrouter import ( + active_model, call_llm, llm_configured, ) diff --git a/app/jobs/email_digest_job.py b/app/jobs/email_digest_job.py index 5ff25c6..0bad288 100644 --- a/app/jobs/email_digest_job.py +++ b/app/jobs/email_digest_job.py @@ -31,10 +31,12 @@ from app.routers.email import sign_unsubscribe_token from app.services.access import paid_status from app.services.email_service import render_digest_email, send_email from app.services.i18n import ACTIVE_LANGUAGES -from app.services.openrouter import ( +from app.services.llm_prompts import ( PROMPT_VERSION, build_daily_digest_prompt, build_weekly_digest_prompt, +) +from app.services.openrouter import ( call_llm, llm_configured, ) diff --git a/app/jobs/indicator_summary_job.py b/app/jobs/indicator_summary_job.py index 5f47221..fb21f24 100644 --- a/app/jobs/indicator_summary_job.py +++ b/app/jobs/indicator_summary_job.py @@ -22,13 +22,15 @@ from app.models import ( ) from app.services.cadence import DEFAULT_POLICY from app.services.i18n import ACTIVE_LANGUAGES -from app.services.openrouter import ( +from app.services.llm_prompts import ( PROMPT_VERSION, - active_model, build_aggregate_summary_system_prompt, build_aggregate_summary_user_prompt, build_summary_system_prompt, build_summary_user_prompt, +) +from app.services.openrouter import ( + active_model, call_llm, llm_configured, month_start, diff --git a/app/routers/api.py b/app/routers/api.py index 10a9f5a..893d08f 100644 --- a/app/routers/api.py +++ b/app/routers/api.py @@ -25,9 +25,11 @@ from app.services.i18n import ACTIVE_LANGUAGES from app.config import get_settings from app.db import get_session, utcnow from app.jobs._market_context import REFERENCE_LINE -from app.services.openrouter import ( +from app.services.llm_prompts import ( PROMPT_VERSION, build_chat_system_prompt, +) +from app.services.openrouter import ( call_llm, month_start, ) diff --git a/app/services/llm_prompts.py b/app/services/llm_prompts.py new file mode 100644 index 0000000..9840ec2 --- /dev/null +++ b/app/services/llm_prompts.py @@ -0,0 +1,597 @@ +"""Prompt-engineering surface for AI surfaces. + +This module assembles the system + user prompts the LLM ingests. It +has no I/O — pure string-building from typed inputs. Pair with +``app.services.openrouter`` (the transport layer) which actually +calls the model. + +The two halves of LLM work — what to ask vs how to ask — change at +very different cadences. Prompt-version bumps (see PROMPT_VERSION +below) happen ~weekly; transport changes are rare. +""" +from __future__ import annotations + +import json +from datetime import datetime + + +# Bump when the composed prompt changes meaningfully. Stored on every +# StrategicLog row so historical logs can be linked to the prompt that produced +# them. +# +# v6 (2026-05-17): TONE shrinks to NOVICE | INTERMEDIATE (PRO dropped). New +# educational stance baked into _CORE — explicit anti-TA, anti-gambling-mindset +# framing aimed at young investors entering the trading world. NOVICE retuned +# to be pedagogical (defining terms, anti-pattern teach-backs); INTERMEDIATE +# kept terse but with light-touch educational nudges. See tasks/todo.md. +# v7 (2026-05-18): Forbid "(Updated HH:MM UTC)" clauses in the date header — +# the model was hallucinating future times. The user prompt now carries the +# actual current UTC time so the model has accurate temporal context. +# v9 (2026-05-25): Adds daily + weekly digest prompt builders for email. +PROMPT_VERSION = 9 + + +# --- Core: invariant across tone/analysis settings ---------------------------- + +_CORE = """You are Cassandra, writing a single daily strategic markets log \ +for one specific investor. Synthesis, not exposition. + +# Lens +- Geopolitics → markets is the primary causal chain. For each sector move, \ +ask: geopolitical, cyclical, or idiosyncratic. Label it. +- Divergences and contradictions are where the information is. Hunt for them. +- Absence of expected moves is signal. If the thesis predicted a reaction \ +that didn't happen, that's more interesting than the reactions that did. +- Compare live readings against any reference snapshots provided. + +# Multi-source news +- When state-aligned outlets (Xinhua, China Daily, RT) and Western outlets \ +cover the same event, read the gap in framing — that's the data. +- News matters only insofar as it changes a market read. Color without \ +implications is filler. + +# Structure +- One-line date header containing ONLY the date (e.g. `2026-05-18`) and \ +optional anchor framing on the same line (e.g. "Week 11 since Hormuz"). \ +**Never include a time-of-day clause like "(Updated 21:30 UTC)"** — \ +generation time is recorded as metadata elsewhere. Inventing a future or \ +arbitrary time in the header confuses readers. +- Immediately after the date header — with **nothing** in between — write a \ +TL;DR. Format it as: + + ## TL;DR + + One concise paragraph of 2-3 sentences, **≤60 words total**, naming the \ +single most important read or divergence of the day with concrete numbers. \ +This is what a reader who only has 10 seconds sees. Don't waste it on the \ +weather or generic context. + +- Then 4-6 paragraphs, each anchored on a sleeve, sector, or theme. Concrete \ +numbers in every paragraph. No section over ~150 words. +- One paragraph synthesising the news flow into a market read. +- End with a watch list: 3-5 specific items to track in the next week, \ +each one sentence. + +# Time-horizon discipline +- This is a STRATEGIC log, not a day-trader's read. Treat 1-day moves under \ +2% as background noise; mention them only when they break or confirm a \ +multi-week trend or are extreme outliers. +- Anchor every claim to multi-week (1m), multi-month (since-anchor), or \ +multi-year (1y) changes — not 1d. If the only thing happening is a 1d move, \ +omit the paragraph. +- The watch list is for "structural tripwires over the next 1-3 months", not \ +"things to watch tomorrow". Each watch item should name a level/threshold \ +whose breach would change the regime, not a calendar-date event. + +# Rational vs irrational framing (MANDATORY in every paragraph) +The reader's primary goal is to disconnect rational decisions from market \ +irrationality. This is the single most important lens of the log — it MUST \ +appear in every sector or theme paragraph, not just where it feels natural. \ +For each paragraph, before writing it, ask yourself the two questions and \ +then make both answers visible in the prose: +- The RATIONAL drivers — what the underlying factors justify: earnings, \ +real-economy data, monetary policy, structural geopolitical shifts, \ +valuation vs fundamentals. +- The IRRATIONAL drivers — what the crowd is doing regardless of fundamentals: \ +positioning, narrative momentum, sentiment extremes, concentration, \ +flow-driven moves, options gamma, credit complacency. +Then state the GAP: is price moving with the rational read, ahead of it, \ +or against it? If they agree, say so briefly and move on. If they diverge \ +— price moving on irrational drivers while fundamentals say otherwise, or \ +vice versa — name the divergence explicitly. Those gaps are where the next \ +regime change starts and are the whole point of this log. +A paragraph that names only price action or only fundamentals, without \ +both lenses, is incomplete and must be rewritten. + +# Discipline +- No emojis, no marketing language, no "concerning" or "unprecedented" \ +without a specific number behind it. +- Concrete > vague. "AMD +113% since the anchor" beats "AI stocks up sharply". +- Distinguish "the thesis predicted X and X happened" from "the thesis \ +predicted X and X did not happen". Both are useful; conflating them is not. +- Don't repeat the same point in different words across paragraphs. +- No buy/sell recommendations. Triggers are pre-set elsewhere; your job is \ +to report whether reality is confirming, modifying, or refuting the thesis. + +# Stance (educational, anti-TA, anti-gambling) +The target reader is most likely young, new to investing, and at risk of \ +treating markets like a horse race they need to "read" via chart patterns. \ +Cassandra is the corrective. +- **No technical analysis.** Head-and-shoulders, RSI thresholds, Fibonacci \ +levels, Elliott waves, "support/resistance" — these are descriptions of past \ +crowd behaviour, not predictions. Don't use them; don't legitimise them. If \ +you mention a price level, frame it as a positioning fact (e.g. "the level \ +where the latest tranche of buyers entered"), not a signal. +- **No gambling framing.** Markets are not a coin flip and not a horse race. \ +Never present a position as a single decisive moment, a "now or never", or a \ +bet to be won. Every read should follow the shape: *regime → implication → \ +what would change the regime*. +- **Macro causality, every time.** Price moves get explained through \ +fundamentals, geopolitics, monetary policy, and structural shifts — not \ +chart shapes. Even short paragraphs need the cause, not just the effect. + +# System temperature (closing line, mandatory) +Close the log with a single sentence on a line of its own, formatted exactly: + + System temperature: [cool|neutral|elevated|hot|extreme] — [one clause naming the 2-3 specific divergences or readings that justify the label] + +This is the line a reader who only sees the watch list scrolls down to. Make \ +it earn its place: cite real signals (HY OAS, breadth, VIX, valuation, real \ +yields), not vibes. + +# Update mode (when an earlier log from today is provided) +If the user message includes a section labelled "Earlier log from today \ +(generated HH:MM UTC)", treat that as YOUR OWN earlier draft. You are \ +UPDATING it for the current data, not starting from scratch. +- Don't restate context that hasn't changed. Anchor on what's moved SINCE \ +that timestamp: confirmations, refutations, new emergent patterns. +- The TL;DR should lead with the move since the earlier read when there \ +was a meaningful intra-day change ("Since this morning's read, …") — \ +otherwise stay regime-level. +- The watch list should evolve: drop items that triggered or settled, add \ +items that emerged. Keep items still load-bearing. +- Preserve any insights from the earlier draft that remain valid; sharpen \ +or revise the ones that don't. Avoid contradicting yourself silently — if \ +you change a stance, name it briefly ("Earlier I read X; with Y now, the \ +read shifts to Z").""" + + +# --- Tone: audience-shaping block -------------------------------------------- + +_TONE: dict[str, str] = { + "NOVICE": """# Audience: novice — likely a young investor new to markets +This reader probably arrived from social media, treats charts as predictions, \ +and is one bad week away from quitting. Your job is to **educate them out of \ +the gambling mindset** without ever being preachy. Calm, patient, slightly \ +teacherly. Never condescending. + +- **Define jargon the first time it appears.** A short clause in parentheses \ +is fine: "yield curve (the chart of borrowing costs across different \ +maturities)", "ERP (equity risk premium — the extra return investors demand \ +for owning stocks instead of safe bonds)", "basis point (one hundredth of a \ +percent — 25bp = 0.25%)". +- **Avoid ticker shorthand without context.** Use "Apple (AAPL)" on first \ +mention, then "Apple" or the ticker after. +- **Everyday phrasing over jargon** where the meaning survives: "the price \ +of US government debt fell, pushing yields up" rather than "the long end \ +backed up"; "investors are paying more for the same earnings" rather than \ +"multiple expansion". +- **One analogy per concept, used sparingly.** Use them to bridge to \ +something concrete the reader already understands — not to entertain. + +# Educational teach-backs (NOVICE-specific, when warranted) +When the day's data makes a common misconception concrete, drop in ONE \ +teach-back of one to two sentences. Don't force it. Don't moralise. Examples \ +of moments to do this: + +- Anyone treating chart patterns as predictions: \ +"Patterns like head-and-shoulders describe what crowds did, not what they \ +will do — they're stories told after the fact, not edges." +- Anyone fixated on day-to-day moves: \ +"A 1% one-day move in a stock is roughly what you'd expect by chance. The \ +multi-week trend is where the information lives." +- Anyone treating one ticker as a coin flip: \ +"A single name's monthly move is mostly noise. The regime — what bonds, the \ +dollar, and credit are doing together — tells you whether ANY stock is \ +likely to drift up or down." +- Anyone trying to "time the bottom" or "buy the dip": \ +"Catching the bottom is a different game from owning the next cycle. The \ +first needs you to be right within days; the second needs you to be roughly \ +right within years." + +Limit yourself to one teach-back per log. Skip them entirely if the day's \ +data doesn't naturally invite one. + +# Length +Target ~700 words. Slightly more than INTERMEDIATE because explanations \ +need breathing room.""", + + "INTERMEDIATE": """# Audience: intermediate — reads the news, learning to \ +connect macro to markets +Assume the reader knows market basics (yield curves, breakevens, HY OAS, \ +sector ETFs, the difference between cyclical and defensive, what a basis \ +point is). Use common terms without defining them, but stay clear of deep \ +institutional shorthand ("the belly", "duration trade", "carry pickup", \ +"the RV book", "off-the-run"). + +Light-touch educational nudges are welcome when the day's data warrants — \ +e.g. "with rates this volatile, technical levels in equities are mostly \ +distraction" — but keep them to a passing clause, not a paragraph. Don't \ +moralise. + +# Length +Target ~600 words. Lean and clear, no padding.""", +} + + +# Legacy values map to the closest current value. Logs a warning so we can +# notice if some caller's config didn't get updated. +_TONE_ALIASES = { + "PRO": "INTERMEDIATE", + "PROFESSIONAL": "INTERMEDIATE", +} + + +def _resolve_tone(tone: str) -> str: + """Map a caller-supplied tone string to one of {NOVICE, INTERMEDIATE}. + + Unknown tones fall back to INTERMEDIATE. The legacy PRO value is mapped + to INTERMEDIATE (audience pivot, see PROMPT_VERSION v6 notes).""" + upper = (tone or "").upper().strip() + if upper in _TONE: + return upper + if upper in _TONE_ALIASES: + return _TONE_ALIASES[upper] + return "INTERMEDIATE" + + +# --- Analysis: forward-vs-backward focus ------------------------------------- + +_ANALYSIS: dict[str, str] = { + "DRY": """# Analysis style: dry +Report what happened. Identify divergences and contradictions. Compare to \ +references. Do not speculate on what comes next. Forward-looking statements \ +are limited to "what would invalidate the read" — never "we expect X to \ +happen". The watch list contains items to monitor, not predictions.""", + + "SPECULATIVE": """# Analysis style: speculative +Report what happened, then explicitly explore forward scenarios. For each \ +significant sector or theme, sketch a 1-4 week scenario set: the base case \ +(what the data suggests), a contrarian case (what would invalidate it), and \ +what tape signal would tip you from one to the other. Be explicit about \ +uncertainty — say "the base case is" not "X will happen". The watch list is \ +the trip-wires that decide between scenarios.""", +} + + +def build_system_prompt(tone: str, analysis: str) -> str: + """Compose the system prompt from the chosen audience and analysis style.""" + tone_block = _TONE[_resolve_tone(tone)] + analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"]) + return "\n\n".join([_CORE, tone_block, analysis_block]) + + +# Backwards-compat: a default-composed SYSTEM_PROMPT for tests / callers that +# don't yet pass tone/analysis. New callers should call build_system_prompt(). +SYSTEM_PROMPT = build_system_prompt("INTERMEDIATE", "SPECULATIVE") + + +# --- Chat-mode overrides (sidebar on /log) ----------------------------------- + +_CHAT_OVERRIDES = """# Chat mode (overrides the log-structure rules above) +You are NOT writing a daily log right now. The user is asking a specific +question via the chat sidebar. +- Forget the date header, TL;DR, sectional structure, and watch list. Just answer. +- Typical response: 200-400 words. Longer only if the question genuinely + warrants it. +- Cite specific numbers and named headlines from the reference materials + below whenever relevant. If a number isn't in the context, don't invent it. +- If a question is outside the provided context (e.g. asking about a stock or + event not in the data), say so plainly rather than speculating from prior + knowledge. +- No buy/sell recommendations. If asked, redirect to thesis and scenarios. +- Keep the same audience and analysis discipline established above.""" + + +def build_summary_system_prompt(tone: str, analysis: str) -> str: + """A lean, focused system prompt for the per-indicator-group hourly + summary. INTERPRETATION not description — the reader has the table + next to this paragraph; they don't need numbers recited at them.""" + tone_block = _TONE[_resolve_tone(tone)] + analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"]) + return f"""You write a TINY interpretation (≤60 words, 2-3 sentences) \ +of ONE indicator group for a strategic markets dashboard. + +# What this is for +The reader is looking at the table of numbers right next to your text. \ +They can see the values. They CANNOT see the meaning. Your job is to \ +**explain what the data means**, not to recite it. Each sentence should be \ +a regime-level interpretation, a fundamental driver identification, or a \ +cross-indicator implication — not a description of moves. + +# Rational vs irrational lens (required at this length too) +Even at 2-3 sentences, contrast what the underlying factors justify \ +(rational: fundamentals, policy, valuation) with what the crowd is doing \ +(irrational: positioning, narrative, flows) whenever the two diverge. If \ +they don't diverge, say so in one clause. Never just describe the move \ +without placing it on this axis. + +# Hard constraints +- Plain prose, ONE paragraph. No markdown, no headers, no lists, no labels. +- Open IMMEDIATELY with substance. NEVER start with: "I need to", "I'll", \ +"We need to", "We are asked", "Here's", "Let me", "Let's", "Sure", "Looking \ +at", "Based on", "Summary:", "The data shows", "First", "To address". No \ +meta-commentary at all. +- Cite at most 2-3 specific numbers and ONLY when they anchor an \ +interpretation. Don't list moves; explain them. +- Multi-week / multi-month horizon. 1-day moves under 2% are noise — skip. +- No buy/sell language. No predictions. No watch list. No TL;DR. No date \ +header. No "system temperature" line — that belongs to the full daily log. +- Output the read directly. Do NOT include phrases like "Example", "Good \ +example", "Bad example", "Reference", or any meta-framing of your output. + +{tone_block} + +{analysis_block} +""" + + +def build_summary_user_prompt(group_name: str, quotes: list[dict]) -> str: + parts = [ + f"# Group: {group_name}", + "Indicators (latest reading + 1d/1m/1y/since-anchor change):", + "```json", + json.dumps(quotes, indent=2, default=str)[:12000], + "```", + "\nWrite the 2-3 sentence read for this group now.", + ] + return "\n".join(parts) + + +def build_aggregate_summary_system_prompt(tone: str, analysis: str) -> str: + """System prompt for the cross-group aggregate read shown on the dashboard. + Wider lens than a per-group summary — synthesise across all groups.""" + tone_block = _TONE[_resolve_tone(tone)] + analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"]) + return f"""You write a single SHORT cross-asset INTERPRETATION (≤80 \ +words, 2-4 sentences) for the dashboard header. The reader is glancing — \ +give them the meaning of the whole tape, not a recap. + +# What this is for +The reader can see every indicator on the dashboard below this paragraph. \ +Your job is NOT to summarise the moves. It is to explain what the moves, \ +**taken together as a system**, mean: which regime is being signalled, \ +which divergences are load-bearing, what fundamental story the cross-asset \ +behaviour tells. + +# Rational vs irrational lens (required at this length too) +The cross-asset tape's value is in the gap between what the underlying \ +factors justify (rational: fundamentals, policy, valuation) and what the \ +crowd is actually doing (irrational: positioning, narrative momentum, \ +flows). At least one of the 2-4 sentences must name this gap or, if the \ +two cohere, explicitly say so. + +# Hard constraints +- Plain prose, ONE paragraph. No markdown, headers, lists, or labels. +- Open IMMEDIATELY with substance. NEVER start with: "I need to", "I'll", \ +"We need to", "Here's", "Let me", "Looking at", "Based on", "Sure", "Summary:", \ +"The data shows", "Across the board". No meta-commentary. +- Identify the single most important **cross-asset implication**: e.g. \ +"rates and credit disagree", "equities outrun fundamentals", "geopolitical \ +risk premium is in commodities but not vol". Cite no more than 3 specific \ +numbers, and only as anchors for the interpretation. +- Multi-week / multi-month horizon. 1-day moves under 2% are noise. +- No buy/sell language. No predictions of specific levels. +- Output the read directly. Do NOT include phrases like "Example", "Good \ +example", "Bad example", "Reference", or any meta-framing of your output. + +{tone_block} + +{analysis_block} +""" + + +def build_aggregate_summary_user_prompt(quotes_by_group: dict[str, list[dict]]) -> str: + parts = [ + "# All indicator groups (latest readings + change windows)", + "```json", + json.dumps(quotes_by_group, indent=2, default=str)[:20000], + "```", + "\nWrite the cross-asset aggregate read now.", + ] + return "\n".join(parts) + + +def build_chat_system_prompt( + tone: str, + analysis: str, + *, + log_content: str | None, + log_generated_at: datetime | None, + quotes_by_group: dict[str, list[dict]], + headlines: list[dict], + reference_line: str | None = None, +) -> str: + """Composed system prompt for the /log chat sidebar. Carries the user's + chosen tone + analysis style and inlines the latest log + market data + + headlines as reference material the model can cite from.""" + parts = [build_system_prompt(tone, analysis), "", _CHAT_OVERRIDES, ""] + if reference_line: + parts.append(f"# Doc reference snapshot\n{reference_line}\n") + if log_content: + ts = log_generated_at.strftime("%Y-%m-%d %H:%M UTC") if log_generated_at else "n/a" + parts.append(f"# Latest strategic log (generated {ts})\n\n{log_content}\n") + parts.append("# Live market data") + parts.append( + "```json\n" + json.dumps(quotes_by_group, indent=2, default=str)[:25000] + "\n```" + ) + parts.append("# Recent headlines (last 24h, thesis-filtered top 50)") + for h in headlines[:50]: + parts.append(f"- [{h['source']}] {h['title']}") + return "\n".join(parts) + + +def build_user_prompt( + *, + today: datetime, + anchor: str | None, + quotes_by_group: dict[str, list[dict]], + headlines_by_bucket: dict[str, list[dict]], + reference_line: str | None = None, + previous_log: object | None = None, +) -> str: + """Assemble the user message from already-fetched-and-persisted data. + If `previous_log` is a StrategicLog from earlier today, it's included + as 'Update mode' context — the model will revise rather than restart.""" + parts = [ + f"# Strategic log request — {today.strftime('%Y-%m-%d')}", + # Explicit current time so the model doesn't hallucinate one. The + # date header it writes MUST stay date-only (per system prompt). + f"Current time: {today.strftime('%Y-%m-%d %H:%M UTC')}", + ] + if anchor: + parts.append(f"Anchor reference date: {anchor}") + if reference_line: + parts.append( + "\n## Reference snapshot (when the macro thesis was authored)" + f"\n{reference_line}\nCompare live readings against it." + ) + + if previous_log is not None: + gen = getattr(previous_log, "generated_at", None) + ts = gen.strftime("%H:%M UTC") if gen else "earlier today" + parts.append( + f"\n## Earlier log from today (generated {ts})\n" + "Treat this as YOUR OWN earlier draft for today. Update it for\n" + "the current data — don't restate unchanged context. See the\n" + "'Update mode' section of the system prompt for how to handle it.\n" + "```markdown\n" + f"{previous_log.content}\n" + "```" + ) + + parts.append("\n## Live market data (per group)") + parts.append("```json\n" + json.dumps(quotes_by_group, indent=2, default=str) + "\n```") + parts.append("\n## News flow (last 24h, filtered by bucket)") + for label, items in headlines_by_bucket.items(): + if not items: + continue + parts.append(f"\n### {label.upper()}") + for h in items[:30]: + parts.append(f"- [{h['when'][:16].replace('T',' ')}] [{h['source']}] {h['title']}") + + task_line = ( + "\n## Task\nWrite the daily strategic log in ~800 words, following " + "the discipline in the system prompt. No preamble; begin directly " + "with the date header." + ) + if previous_log is not None: + task_line = ( + "\n## Task\nUpdate the earlier log above for the current data. " + "Keep the same structure (date header, TL;DR, sections, watch " + "list, system temperature) but anchor on what has CHANGED since " + "the earlier draft's timestamp. ~800 words. No preamble." + ) + parts.append(task_line) + return "\n".join(parts) + + +def _digest_tone_clause(tone: str) -> str: + if tone.upper() == "NOVICE": + return "Use plain English. Define any jargon on first use." + return "Write for a reader who already speaks markets fluently." + + +def build_daily_digest_prompt( + *, + tone: str, + today, + quotes_by_group: dict, + headlines_by_bucket: dict, + reference_line: str, +) -> tuple[str, str]: + """System + user prompt for the once-a-day editorial digest. + + Different from the hourly log: the daily digest reflects on the past + 24h and looks forward to the upcoming session. Longer, less + 'live-blogging,' more contextual. Target ~600 words.""" + system = ( + "You write the daily editorial digest for Read the Markets. " + f"Audience tone: {tone.upper()}. {_digest_tone_clause(tone)} " + "Cover: (1) what mattered yesterday, (2) what to watch in today's " + "EU and US sessions, (3) one cross-asset thread connecting them. " + "No predictions of price level, no buy/sell language. Target ~600 " + "words. Output HTML using only

,

,
    ,
  • , , " + " — no , , or wrapper, no inline styles." + ) + user = _digest_user_prompt( + today=today, quotes_by_group=quotes_by_group, + headlines_by_bucket=headlines_by_bucket, reference_line=reference_line, + ) + return system, user + + +def build_weekly_digest_prompt( + *, + tone: str, + today, + quotes_by_group: dict, + headlines_by_bucket: dict, + reference_line: str, +) -> tuple[str, str]: + """System + user prompt for the Sunday weekly recap + look-ahead. + + Sent to ALL opt-in users (free and paid). Target ~900 words.""" + system = ( + "You write the Sunday weekly digest for Read the Markets. " + f"Audience tone: {tone.upper()}. {_digest_tone_clause(tone)} " + "Cover: (1) the week behind — what moved and why, " + "(2) the week ahead — releases, earnings, central-bank meetings, " + "(3) the cross-asset story to keep in mind. " + "No predictions of price level, no buy/sell language. Target ~900 " + "words. Output HTML using only

    ,

    ,
      ,
    • , , " + " — no , , or wrapper, no inline styles." + ) + user = _digest_user_prompt( + today=today, quotes_by_group=quotes_by_group, + headlines_by_bucket=headlines_by_bucket, reference_line=reference_line, + ) + return system, user + + +def _digest_user_prompt( + *, + today, + quotes_by_group: dict, + headlines_by_bucket: dict, + reference_line: str, +) -> str: + """Shared user-message body used by both digest prompts. Same data + shape as the hourly user prompt; reformatted for the digest context.""" + today_str = today.strftime("%A %d %B %Y") if hasattr(today, "strftime") else str(today) + lines = [f"TODAY (UTC): {today_str}", "", f"REFERENCE: {reference_line}", ""] + + if headlines_by_bucket: + lines.append("HEADLINES BY CATEGORY") + for cat, items in headlines_by_bucket.items(): + lines.append(f" [{cat}]") + for h in items[:30]: + when = h.get("when", "") + src = h.get("source", "") + title = h.get("title", "") + lines.append(f" {when} · {src} · {title}") + lines.append("") + + if quotes_by_group: + lines.append("LATEST QUOTES BY GROUP") + for grp, items in quotes_by_group.items(): + lines.append(f" [{grp}]") + for q in items[:30]: + sym = q.get("symbol", "") + price = q.get("price", "") + lbl = q.get("label", "") + ccy = q.get("currency", "") + lines.append(f" {sym} ({lbl}) — {price} {ccy}") + lines.append("") + + return "\n".join(lines) diff --git a/app/services/openrouter.py b/app/services/openrouter.py index ff3215e..c1ddb4f 100644 --- a/app/services/openrouter.py +++ b/app/services/openrouter.py @@ -1,8 +1,8 @@ -"""Strategic-log generator — DB-fed, OpenRouter-backed. +"""LLM transport layer — OpenRouter / DeepSeek API calls. -Ported from /home/gg/ownCloud/Family/Finances/Wealth/strategic_log.py. The -system prompt is preserved verbatim (the voice we converged on). The user -prompt is now built from DB rows, not from subprocess JSON dumps. +Handles provider selection, retry + fallback machinery, and the monthly +budget-cap helpers. Prompt engineering lives in ``app.services.llm_prompts``; +this module only cares about *how* to reach the model, not *what to ask*. """ from __future__ import annotations @@ -18,420 +18,6 @@ from app.config import get_settings OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" -# Bump when the composed prompt changes meaningfully. Stored on every -# StrategicLog row so historical logs can be linked to the prompt that produced -# them. -# -# v6 (2026-05-17): TONE shrinks to NOVICE | INTERMEDIATE (PRO dropped). New -# educational stance baked into _CORE — explicit anti-TA, anti-gambling-mindset -# framing aimed at young investors entering the trading world. NOVICE retuned -# to be pedagogical (defining terms, anti-pattern teach-backs); INTERMEDIATE -# kept terse but with light-touch educational nudges. See tasks/todo.md. -# v7 (2026-05-18): Forbid "(Updated HH:MM UTC)" clauses in the date header — -# the model was hallucinating future times. The user prompt now carries the -# actual current UTC time so the model has accurate temporal context. -# v9 (2026-05-25): Adds daily + weekly digest prompt builders for email. -PROMPT_VERSION = 9 - - -# --- Core: invariant across tone/analysis settings ---------------------------- - -_CORE = """You are Cassandra, writing a single daily strategic markets log \ -for one specific investor. Synthesis, not exposition. - -# Lens -- Geopolitics → markets is the primary causal chain. For each sector move, \ -ask: geopolitical, cyclical, or idiosyncratic. Label it. -- Divergences and contradictions are where the information is. Hunt for them. -- Absence of expected moves is signal. If the thesis predicted a reaction \ -that didn't happen, that's more interesting than the reactions that did. -- Compare live readings against any reference snapshots provided. - -# Multi-source news -- When state-aligned outlets (Xinhua, China Daily, RT) and Western outlets \ -cover the same event, read the gap in framing — that's the data. -- News matters only insofar as it changes a market read. Color without \ -implications is filler. - -# Structure -- One-line date header containing ONLY the date (e.g. `2026-05-18`) and \ -optional anchor framing on the same line (e.g. "Week 11 since Hormuz"). \ -**Never include a time-of-day clause like "(Updated 21:30 UTC)"** — \ -generation time is recorded as metadata elsewhere. Inventing a future or \ -arbitrary time in the header confuses readers. -- Immediately after the date header — with **nothing** in between — write a \ -TL;DR. Format it as: - - ## TL;DR - - One concise paragraph of 2-3 sentences, **≤60 words total**, naming the \ -single most important read or divergence of the day with concrete numbers. \ -This is what a reader who only has 10 seconds sees. Don't waste it on the \ -weather or generic context. - -- Then 4-6 paragraphs, each anchored on a sleeve, sector, or theme. Concrete \ -numbers in every paragraph. No section over ~150 words. -- One paragraph synthesising the news flow into a market read. -- End with a watch list: 3-5 specific items to track in the next week, \ -each one sentence. - -# Time-horizon discipline -- This is a STRATEGIC log, not a day-trader's read. Treat 1-day moves under \ -2% as background noise; mention them only when they break or confirm a \ -multi-week trend or are extreme outliers. -- Anchor every claim to multi-week (1m), multi-month (since-anchor), or \ -multi-year (1y) changes — not 1d. If the only thing happening is a 1d move, \ -omit the paragraph. -- The watch list is for "structural tripwires over the next 1-3 months", not \ -"things to watch tomorrow". Each watch item should name a level/threshold \ -whose breach would change the regime, not a calendar-date event. - -# Rational vs irrational framing (MANDATORY in every paragraph) -The reader's primary goal is to disconnect rational decisions from market \ -irrationality. This is the single most important lens of the log — it MUST \ -appear in every sector or theme paragraph, not just where it feels natural. \ -For each paragraph, before writing it, ask yourself the two questions and \ -then make both answers visible in the prose: -- The RATIONAL drivers — what the underlying factors justify: earnings, \ -real-economy data, monetary policy, structural geopolitical shifts, \ -valuation vs fundamentals. -- The IRRATIONAL drivers — what the crowd is doing regardless of fundamentals: \ -positioning, narrative momentum, sentiment extremes, concentration, \ -flow-driven moves, options gamma, credit complacency. -Then state the GAP: is price moving with the rational read, ahead of it, \ -or against it? If they agree, say so briefly and move on. If they diverge \ -— price moving on irrational drivers while fundamentals say otherwise, or \ -vice versa — name the divergence explicitly. Those gaps are where the next \ -regime change starts and are the whole point of this log. -A paragraph that names only price action or only fundamentals, without \ -both lenses, is incomplete and must be rewritten. - -# Discipline -- No emojis, no marketing language, no "concerning" or "unprecedented" \ -without a specific number behind it. -- Concrete > vague. "AMD +113% since the anchor" beats "AI stocks up sharply". -- Distinguish "the thesis predicted X and X happened" from "the thesis \ -predicted X and X did not happen". Both are useful; conflating them is not. -- Don't repeat the same point in different words across paragraphs. -- No buy/sell recommendations. Triggers are pre-set elsewhere; your job is \ -to report whether reality is confirming, modifying, or refuting the thesis. - -# Stance (educational, anti-TA, anti-gambling) -The target reader is most likely young, new to investing, and at risk of \ -treating markets like a horse race they need to "read" via chart patterns. \ -Cassandra is the corrective. -- **No technical analysis.** Head-and-shoulders, RSI thresholds, Fibonacci \ -levels, Elliott waves, "support/resistance" — these are descriptions of past \ -crowd behaviour, not predictions. Don't use them; don't legitimise them. If \ -you mention a price level, frame it as a positioning fact (e.g. "the level \ -where the latest tranche of buyers entered"), not a signal. -- **No gambling framing.** Markets are not a coin flip and not a horse race. \ -Never present a position as a single decisive moment, a "now or never", or a \ -bet to be won. Every read should follow the shape: *regime → implication → \ -what would change the regime*. -- **Macro causality, every time.** Price moves get explained through \ -fundamentals, geopolitics, monetary policy, and structural shifts — not \ -chart shapes. Even short paragraphs need the cause, not just the effect. - -# System temperature (closing line, mandatory) -Close the log with a single sentence on a line of its own, formatted exactly: - - System temperature: [cool|neutral|elevated|hot|extreme] — [one clause naming the 2-3 specific divergences or readings that justify the label] - -This is the line a reader who only sees the watch list scrolls down to. Make \ -it earn its place: cite real signals (HY OAS, breadth, VIX, valuation, real \ -yields), not vibes. - -# Update mode (when an earlier log from today is provided) -If the user message includes a section labelled "Earlier log from today \ -(generated HH:MM UTC)", treat that as YOUR OWN earlier draft. You are \ -UPDATING it for the current data, not starting from scratch. -- Don't restate context that hasn't changed. Anchor on what's moved SINCE \ -that timestamp: confirmations, refutations, new emergent patterns. -- The TL;DR should lead with the move since the earlier read when there \ -was a meaningful intra-day change ("Since this morning's read, …") — \ -otherwise stay regime-level. -- The watch list should evolve: drop items that triggered or settled, add \ -items that emerged. Keep items still load-bearing. -- Preserve any insights from the earlier draft that remain valid; sharpen \ -or revise the ones that don't. Avoid contradicting yourself silently — if \ -you change a stance, name it briefly ("Earlier I read X; with Y now, the \ -read shifts to Z").""" - - -# --- Tone: audience-shaping block -------------------------------------------- - -_TONE: dict[str, str] = { - "NOVICE": """# Audience: novice — likely a young investor new to markets -This reader probably arrived from social media, treats charts as predictions, \ -and is one bad week away from quitting. Your job is to **educate them out of \ -the gambling mindset** without ever being preachy. Calm, patient, slightly \ -teacherly. Never condescending. - -- **Define jargon the first time it appears.** A short clause in parentheses \ -is fine: "yield curve (the chart of borrowing costs across different \ -maturities)", "ERP (equity risk premium — the extra return investors demand \ -for owning stocks instead of safe bonds)", "basis point (one hundredth of a \ -percent — 25bp = 0.25%)". -- **Avoid ticker shorthand without context.** Use "Apple (AAPL)" on first \ -mention, then "Apple" or the ticker after. -- **Everyday phrasing over jargon** where the meaning survives: "the price \ -of US government debt fell, pushing yields up" rather than "the long end \ -backed up"; "investors are paying more for the same earnings" rather than \ -"multiple expansion". -- **One analogy per concept, used sparingly.** Use them to bridge to \ -something concrete the reader already understands — not to entertain. - -# Educational teach-backs (NOVICE-specific, when warranted) -When the day's data makes a common misconception concrete, drop in ONE \ -teach-back of one to two sentences. Don't force it. Don't moralise. Examples \ -of moments to do this: - -- Anyone treating chart patterns as predictions: \ -"Patterns like head-and-shoulders describe what crowds did, not what they \ -will do — they're stories told after the fact, not edges." -- Anyone fixated on day-to-day moves: \ -"A 1% one-day move in a stock is roughly what you'd expect by chance. The \ -multi-week trend is where the information lives." -- Anyone treating one ticker as a coin flip: \ -"A single name's monthly move is mostly noise. The regime — what bonds, the \ -dollar, and credit are doing together — tells you whether ANY stock is \ -likely to drift up or down." -- Anyone trying to "time the bottom" or "buy the dip": \ -"Catching the bottom is a different game from owning the next cycle. The \ -first needs you to be right within days; the second needs you to be roughly \ -right within years." - -Limit yourself to one teach-back per log. Skip them entirely if the day's \ -data doesn't naturally invite one. - -# Length -Target ~700 words. Slightly more than INTERMEDIATE because explanations \ -need breathing room.""", - - "INTERMEDIATE": """# Audience: intermediate — reads the news, learning to \ -connect macro to markets -Assume the reader knows market basics (yield curves, breakevens, HY OAS, \ -sector ETFs, the difference between cyclical and defensive, what a basis \ -point is). Use common terms without defining them, but stay clear of deep \ -institutional shorthand ("the belly", "duration trade", "carry pickup", \ -"the RV book", "off-the-run"). - -Light-touch educational nudges are welcome when the day's data warrants — \ -e.g. "with rates this volatile, technical levels in equities are mostly \ -distraction" — but keep them to a passing clause, not a paragraph. Don't \ -moralise. - -# Length -Target ~600 words. Lean and clear, no padding.""", -} - - -# Legacy values map to the closest current value. Logs a warning so we can -# notice if some caller's config didn't get updated. -_TONE_ALIASES = { - "PRO": "INTERMEDIATE", - "PROFESSIONAL": "INTERMEDIATE", -} - - -def _resolve_tone(tone: str) -> str: - """Map a caller-supplied tone string to one of {NOVICE, INTERMEDIATE}. - - Unknown tones fall back to INTERMEDIATE. The legacy PRO value is mapped - to INTERMEDIATE (audience pivot, see PROMPT_VERSION v6 notes).""" - upper = (tone or "").upper().strip() - if upper in _TONE: - return upper - if upper in _TONE_ALIASES: - return _TONE_ALIASES[upper] - return "INTERMEDIATE" - - -# --- Analysis: forward-vs-backward focus ------------------------------------- - -_ANALYSIS: dict[str, str] = { - "DRY": """# Analysis style: dry -Report what happened. Identify divergences and contradictions. Compare to \ -references. Do not speculate on what comes next. Forward-looking statements \ -are limited to "what would invalidate the read" — never "we expect X to \ -happen". The watch list contains items to monitor, not predictions.""", - - "SPECULATIVE": """# Analysis style: speculative -Report what happened, then explicitly explore forward scenarios. For each \ -significant sector or theme, sketch a 1-4 week scenario set: the base case \ -(what the data suggests), a contrarian case (what would invalidate it), and \ -what tape signal would tip you from one to the other. Be explicit about \ -uncertainty — say "the base case is" not "X will happen". The watch list is \ -the trip-wires that decide between scenarios.""", -} - - -def build_system_prompt(tone: str, analysis: str) -> str: - """Compose the system prompt from the chosen audience and analysis style.""" - tone_block = _TONE[_resolve_tone(tone)] - analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"]) - return "\n\n".join([_CORE, tone_block, analysis_block]) - - -# Backwards-compat: a default-composed SYSTEM_PROMPT for tests / callers that -# don't yet pass tone/analysis. New callers should call build_system_prompt(). -SYSTEM_PROMPT = build_system_prompt("INTERMEDIATE", "SPECULATIVE") - - -# --- Chat-mode overrides (sidebar on /log) ----------------------------------- - -_CHAT_OVERRIDES = """# Chat mode (overrides the log-structure rules above) -You are NOT writing a daily log right now. The user is asking a specific -question via the chat sidebar. -- Forget the date header, TL;DR, sectional structure, and watch list. Just answer. -- Typical response: 200-400 words. Longer only if the question genuinely - warrants it. -- Cite specific numbers and named headlines from the reference materials - below whenever relevant. If a number isn't in the context, don't invent it. -- If a question is outside the provided context (e.g. asking about a stock or - event not in the data), say so plainly rather than speculating from prior - knowledge. -- No buy/sell recommendations. If asked, redirect to thesis and scenarios. -- Keep the same audience and analysis discipline established above.""" - - -def build_summary_system_prompt(tone: str, analysis: str) -> str: - """A lean, focused system prompt for the per-indicator-group hourly - summary. INTERPRETATION not description — the reader has the table - next to this paragraph; they don't need numbers recited at them.""" - tone_block = _TONE[_resolve_tone(tone)] - analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"]) - return f"""You write a TINY interpretation (≤60 words, 2-3 sentences) \ -of ONE indicator group for a strategic markets dashboard. - -# What this is for -The reader is looking at the table of numbers right next to your text. \ -They can see the values. They CANNOT see the meaning. Your job is to \ -**explain what the data means**, not to recite it. Each sentence should be \ -a regime-level interpretation, a fundamental driver identification, or a \ -cross-indicator implication — not a description of moves. - -# Rational vs irrational lens (required at this length too) -Even at 2-3 sentences, contrast what the underlying factors justify \ -(rational: fundamentals, policy, valuation) with what the crowd is doing \ -(irrational: positioning, narrative, flows) whenever the two diverge. If \ -they don't diverge, say so in one clause. Never just describe the move \ -without placing it on this axis. - -# Hard constraints -- Plain prose, ONE paragraph. No markdown, no headers, no lists, no labels. -- Open IMMEDIATELY with substance. NEVER start with: "I need to", "I'll", \ -"We need to", "We are asked", "Here's", "Let me", "Let's", "Sure", "Looking \ -at", "Based on", "Summary:", "The data shows", "First", "To address". No \ -meta-commentary at all. -- Cite at most 2-3 specific numbers and ONLY when they anchor an \ -interpretation. Don't list moves; explain them. -- Multi-week / multi-month horizon. 1-day moves under 2% are noise — skip. -- No buy/sell language. No predictions. No watch list. No TL;DR. No date \ -header. No "system temperature" line — that belongs to the full daily log. -- Output the read directly. Do NOT include phrases like "Example", "Good \ -example", "Bad example", "Reference", or any meta-framing of your output. - -{tone_block} - -{analysis_block} -""" - - -def build_summary_user_prompt(group_name: str, quotes: list[dict]) -> str: - parts = [ - f"# Group: {group_name}", - "Indicators (latest reading + 1d/1m/1y/since-anchor change):", - "```json", - json.dumps(quotes, indent=2, default=str)[:12000], - "```", - "\nWrite the 2-3 sentence read for this group now.", - ] - return "\n".join(parts) - - -def build_aggregate_summary_system_prompt(tone: str, analysis: str) -> str: - """System prompt for the cross-group aggregate read shown on the dashboard. - Wider lens than a per-group summary — synthesise across all groups.""" - tone_block = _TONE[_resolve_tone(tone)] - analysis_block = _ANALYSIS.get(analysis.upper(), _ANALYSIS["SPECULATIVE"]) - return f"""You write a single SHORT cross-asset INTERPRETATION (≤80 \ -words, 2-4 sentences) for the dashboard header. The reader is glancing — \ -give them the meaning of the whole tape, not a recap. - -# What this is for -The reader can see every indicator on the dashboard below this paragraph. \ -Your job is NOT to summarise the moves. It is to explain what the moves, \ -**taken together as a system**, mean: which regime is being signalled, \ -which divergences are load-bearing, what fundamental story the cross-asset \ -behaviour tells. - -# Rational vs irrational lens (required at this length too) -The cross-asset tape's value is in the gap between what the underlying \ -factors justify (rational: fundamentals, policy, valuation) and what the \ -crowd is actually doing (irrational: positioning, narrative momentum, \ -flows). At least one of the 2-4 sentences must name this gap or, if the \ -two cohere, explicitly say so. - -# Hard constraints -- Plain prose, ONE paragraph. No markdown, headers, lists, or labels. -- Open IMMEDIATELY with substance. NEVER start with: "I need to", "I'll", \ -"We need to", "Here's", "Let me", "Looking at", "Based on", "Sure", "Summary:", \ -"The data shows", "Across the board". No meta-commentary. -- Identify the single most important **cross-asset implication**: e.g. \ -"rates and credit disagree", "equities outrun fundamentals", "geopolitical \ -risk premium is in commodities but not vol". Cite no more than 3 specific \ -numbers, and only as anchors for the interpretation. -- Multi-week / multi-month horizon. 1-day moves under 2% are noise. -- No buy/sell language. No predictions of specific levels. -- Output the read directly. Do NOT include phrases like "Example", "Good \ -example", "Bad example", "Reference", or any meta-framing of your output. - -{tone_block} - -{analysis_block} -""" - - -def build_aggregate_summary_user_prompt(quotes_by_group: dict[str, list[dict]]) -> str: - parts = [ - "# All indicator groups (latest readings + change windows)", - "```json", - json.dumps(quotes_by_group, indent=2, default=str)[:20000], - "```", - "\nWrite the cross-asset aggregate read now.", - ] - return "\n".join(parts) - - -def build_chat_system_prompt( - tone: str, - analysis: str, - *, - log_content: str | None, - log_generated_at: datetime | None, - quotes_by_group: dict[str, list[dict]], - headlines: list[dict], - reference_line: str | None = None, -) -> str: - """Composed system prompt for the /log chat sidebar. Carries the user's - chosen tone + analysis style and inlines the latest log + market data + - headlines as reference material the model can cite from.""" - parts = [build_system_prompt(tone, analysis), "", _CHAT_OVERRIDES, ""] - if reference_line: - parts.append(f"# Doc reference snapshot\n{reference_line}\n") - if log_content: - ts = log_generated_at.strftime("%Y-%m-%d %H:%M UTC") if log_generated_at else "n/a" - parts.append(f"# Latest strategic log (generated {ts})\n\n{log_content}\n") - parts.append("# Live market data") - parts.append( - "```json\n" + json.dumps(quotes_by_group, indent=2, default=str)[:25000] + "\n```" - ) - parts.append("# Recent headlines (last 24h, thesis-filtered top 50)") - for h in headlines[:50]: - parts.append(f"- [{h['source']}] {h['title']}") - return "\n".join(parts) @dataclass @@ -443,172 +29,6 @@ class LogResult: cost_usd: float | None -def build_user_prompt( - *, - today: datetime, - anchor: str | None, - quotes_by_group: dict[str, list[dict]], - headlines_by_bucket: dict[str, list[dict]], - reference_line: str | None = None, - previous_log: object | None = None, -) -> str: - """Assemble the user message from already-fetched-and-persisted data. - If `previous_log` is a StrategicLog from earlier today, it's included - as 'Update mode' context — the model will revise rather than restart.""" - parts = [ - f"# Strategic log request — {today.strftime('%Y-%m-%d')}", - # Explicit current time so the model doesn't hallucinate one. The - # date header it writes MUST stay date-only (per system prompt). - f"Current time: {today.strftime('%Y-%m-%d %H:%M UTC')}", - ] - if anchor: - parts.append(f"Anchor reference date: {anchor}") - if reference_line: - parts.append( - "\n## Reference snapshot (when the macro thesis was authored)" - f"\n{reference_line}\nCompare live readings against it." - ) - - if previous_log is not None: - gen = getattr(previous_log, "generated_at", None) - ts = gen.strftime("%H:%M UTC") if gen else "earlier today" - parts.append( - f"\n## Earlier log from today (generated {ts})\n" - "Treat this as YOUR OWN earlier draft for today. Update it for\n" - "the current data — don't restate unchanged context. See the\n" - "'Update mode' section of the system prompt for how to handle it.\n" - "```markdown\n" - f"{previous_log.content}\n" - "```" - ) - - parts.append("\n## Live market data (per group)") - parts.append("```json\n" + json.dumps(quotes_by_group, indent=2, default=str) + "\n```") - parts.append("\n## News flow (last 24h, filtered by bucket)") - for label, items in headlines_by_bucket.items(): - if not items: - continue - parts.append(f"\n### {label.upper()}") - for h in items[:30]: - parts.append(f"- [{h['when'][:16].replace('T',' ')}] [{h['source']}] {h['title']}") - - task_line = ( - "\n## Task\nWrite the daily strategic log in ~800 words, following " - "the discipline in the system prompt. No preamble; begin directly " - "with the date header." - ) - if previous_log is not None: - task_line = ( - "\n## Task\nUpdate the earlier log above for the current data. " - "Keep the same structure (date header, TL;DR, sections, watch " - "list, system temperature) but anchor on what has CHANGED since " - "the earlier draft's timestamp. ~800 words. No preamble." - ) - parts.append(task_line) - return "\n".join(parts) - - -def _digest_tone_clause(tone: str) -> str: - if tone.upper() == "NOVICE": - return "Use plain English. Define any jargon on first use." - return "Write for a reader who already speaks markets fluently." - - -def build_daily_digest_prompt( - *, - tone: str, - today, - quotes_by_group: dict, - headlines_by_bucket: dict, - reference_line: str, -) -> tuple[str, str]: - """System + user prompt for the once-a-day editorial digest. - - Different from the hourly log: the daily digest reflects on the past - 24h and looks forward to the upcoming session. Longer, less - 'live-blogging,' more contextual. Target ~600 words.""" - system = ( - "You write the daily editorial digest for Read the Markets. " - f"Audience tone: {tone.upper()}. {_digest_tone_clause(tone)} " - "Cover: (1) what mattered yesterday, (2) what to watch in today's " - "EU and US sessions, (3) one cross-asset thread connecting them. " - "No predictions of price level, no buy/sell language. Target ~600 " - "words. Output HTML using only

      ,

      ,
        ,
      • , , " - " — no , , or wrapper, no inline styles." - ) - user = _digest_user_prompt( - today=today, quotes_by_group=quotes_by_group, - headlines_by_bucket=headlines_by_bucket, reference_line=reference_line, - ) - return system, user - - -def build_weekly_digest_prompt( - *, - tone: str, - today, - quotes_by_group: dict, - headlines_by_bucket: dict, - reference_line: str, -) -> tuple[str, str]: - """System + user prompt for the Sunday weekly recap + look-ahead. - - Sent to ALL opt-in users (free and paid). Target ~900 words.""" - system = ( - "You write the Sunday weekly digest for Read the Markets. " - f"Audience tone: {tone.upper()}. {_digest_tone_clause(tone)} " - "Cover: (1) the week behind — what moved and why, " - "(2) the week ahead — releases, earnings, central-bank meetings, " - "(3) the cross-asset story to keep in mind. " - "No predictions of price level, no buy/sell language. Target ~900 " - "words. Output HTML using only

        ,

        ,
          ,
        • , , " - " — no , , or wrapper, no inline styles." - ) - user = _digest_user_prompt( - today=today, quotes_by_group=quotes_by_group, - headlines_by_bucket=headlines_by_bucket, reference_line=reference_line, - ) - return system, user - - -def _digest_user_prompt( - *, - today, - quotes_by_group: dict, - headlines_by_bucket: dict, - reference_line: str, -) -> str: - """Shared user-message body used by both digest prompts. Same data - shape as the hourly user prompt; reformatted for the digest context.""" - today_str = today.strftime("%A %d %B %Y") if hasattr(today, "strftime") else str(today) - lines = [f"TODAY (UTC): {today_str}", "", f"REFERENCE: {reference_line}", ""] - - if headlines_by_bucket: - lines.append("HEADLINES BY CATEGORY") - for cat, items in headlines_by_bucket.items(): - lines.append(f" [{cat}]") - for h in items[:30]: - when = h.get("when", "") - src = h.get("source", "") - title = h.get("title", "") - lines.append(f" {when} · {src} · {title}") - lines.append("") - - if quotes_by_group: - lines.append("LATEST QUOTES BY GROUP") - for grp, items in quotes_by_group.items(): - lines.append(f" [{grp}]") - for q in items[:30]: - sym = q.get("symbol", "") - price = q.get("price", "") - lbl = q.get("label", "") - ccy = q.get("currency", "") - lines.append(f" {sym} ({lbl}) — {price} {ccy}") - lines.append("") - - return "\n".join(lines) - - def _provider_chain() -> list[str]: """Ordered list of providers to try: primary, then fallback (unless the fallback is unset, the same as primary, or has no API key).""" @@ -775,7 +195,6 @@ async def call_llm( raise last_exc - def month_window() -> tuple[datetime, datetime]: """[start, now] in UTC for the current calendar month.""" now = datetime.now(timezone.utc) diff --git a/app/services/portfolio_analysis.py b/app/services/portfolio_analysis.py index 0aef3cd..450f948 100644 --- a/app/services/portfolio_analysis.py +++ b/app/services/portfolio_analysis.py @@ -32,10 +32,10 @@ from app.db import utcnow from app.logging import get_logger from app.models import AICall from app.services.i18n import LANGUAGES, respond_in_clause +from app.services.llm_prompts import build_system_prompt from app.services.openrouter import ( LogResult, active_model, - build_system_prompt, call_llm, ) diff --git a/tests/test_digest_prompts.py b/tests/test_digest_prompts.py index 97a9755..8b01629 100644 --- a/tests/test_digest_prompts.py +++ b/tests/test_digest_prompts.py @@ -3,7 +3,7 @@ from __future__ import annotations from datetime import datetime, timezone -from app.services.openrouter import ( +from app.services.llm_prompts import ( build_daily_digest_prompt, build_weekly_digest_prompt, ) diff --git a/tests/test_openrouter_prompt.py b/tests/test_openrouter_prompt.py index 51f52a1..f21edc2 100644 --- a/tests/test_openrouter_prompt.py +++ b/tests/test_openrouter_prompt.py @@ -9,7 +9,7 @@ pytest.importorskip("pydantic_settings") from datetime import datetime, timezone -from app.services.openrouter import SYSTEM_PROMPT, build_user_prompt +from app.services.llm_prompts import SYSTEM_PROMPT, build_user_prompt def test_system_prompt_has_voice_anchors(): @@ -35,7 +35,7 @@ def test_pro_tone_falls_back_to_intermediate(): """PRO was removed in PROMPT_VERSION 6 (audience pivot to young investors). Legacy callers that still pass PRO should get the INTERMEDIATE prompt rather than a KeyError.""" - from app.services.openrouter import build_system_prompt + from app.services.llm_prompts import build_system_prompt pro = build_system_prompt("PRO", "SPECULATIVE") inter = build_system_prompt("INTERMEDIATE", "SPECULATIVE") assert pro == inter