"""Novice-mode glossary: terms commonly used in macro market commentary, each paired with a plain-language definition. Applied via `wrap_glossary(html, tone)` in the AI-content rendering path on the API side. Only NOVICE-tone responses get the wrapping; INTERMEDIATE users see plain text. The wrap markup is: VIX `title` gives a native fallback on touch devices that don't fire :hover. The CSS tooltip (see `.glossary:hover::after` in cassandra.css) uses `data-def` for richer formatting. Wrapping happens at most once per term per HTML fragment — repeated occurrences stay plain. """ from __future__ import annotations import html as _html import re from dataclasses import dataclass @dataclass(frozen=True) class Term: """One glossary entry. `aliases`: alternate forms that should also match (case-insensitive unless the term is acronym-style, see `case_sensitive`). `case_sensitive`: when True, the regex preserves capitalisation — used for acronyms like VIX, ERP, DXY where lowercase matches would catch common words. """ label: str definition: str aliases: tuple[str, ...] = () case_sensitive: bool = False # Curated for macro reads aimed at young investors. Keep definitions # under ~30 words each — they have to fit in a tooltip. TERMS: tuple[Term, ...] = ( Term( "VIX", "The CBOE Volatility Index. Tracks the market's expected 30-day " "volatility of the S&P 500 — often called the 'fear gauge'. High " "VIX = traders pricing in big moves; low VIX = calm complacency.", case_sensitive=True, ), Term( "yield curve", "A chart of US (or any government's) borrowing costs across " "maturities — 2-year, 5-year, 10-year, etc. Its shape signals " "what markets expect from growth and interest rates.", ), Term( "inverted yield curve", "When short-term yields exceed long-term yields. Historically one " "of the most reliable recession warning signals — it means " "markets expect rates to be cut in the future.", ), Term( "basis point", "One hundredth of a percent. 100bp = 1%. Markets quote rate " "changes in basis points so '25bp hike' = a 0.25% rate increase.", aliases=("basis points", "bp", "bps", "bps."), ), Term( "ERP", "Equity risk premium — the extra return investors demand for " "owning stocks instead of risk-free Treasuries. Low ERP = stocks " "look expensive vs. bonds; high ERP = the opposite.", aliases=("equity risk premium",), case_sensitive=True, ), Term( "HY OAS", "High-yield option-adjusted spread — the extra yield junk bonds " "pay over Treasuries. Rising HY OAS = credit markets worried; " "falling = complacency. A key risk gauge.", aliases=("high-yield OAS", "high yield OAS", "high-yield spread", "credit spread"), case_sensitive=True, ), Term( "CPI", "Consumer Price Index — the headline inflation measure. Tracks " "the average price change of a basket of goods households buy. " "Released monthly; markets watch it for Fed-rate implications.", case_sensitive=True, ), Term( "breakeven", "Inflation breakeven — the difference between a regular Treasury " "yield and an inflation-protected one. Markets' implied inflation " "expectation for that horizon. Watched as a forward inflation read.", aliases=("breakevens", "inflation breakeven"), ), Term( "duration", "How sensitive a bond's price is to rate changes. A 10-year " "duration means roughly a 10% price drop for every 1% rate " "rise. Long-duration assets get hurt most by rate hikes.", ), Term( "Fed", "The US Federal Reserve — the central bank that sets US interest " "rates and provides dollar liquidity. Its rate decisions ripple " "through every asset class globally.", aliases=("Federal Reserve",), case_sensitive=True, ), Term( "FOMC", "Federal Open Market Committee — the Fed's rate-setting body. " "Meets ~8 times a year; its statements and the chair's press " "conference move markets reliably.", case_sensitive=True, ), Term( "ECB", "European Central Bank — the euro area's Fed-equivalent. Sets " "rates for 20 countries; its decisions matter for EUR, bunds, " "and European banks.", case_sensitive=True, ), Term( "BOJ", "Bank of Japan — Japan's central bank, the last major holdout of " "near-zero rates. Its policy shifts move USD/JPY, global " "carry trades, and long-end yields worldwide.", case_sensitive=True, ), Term( "DXY", "The Dollar Index — the USD's value against a basket of major " "currencies (mostly EUR, JPY, GBP). Rising DXY squeezes dollar-" "denominated debt and pressures commodities.", aliases=("dollar index",), case_sensitive=True, ), Term( "Brent", "The international benchmark for crude oil, priced from " "North Sea fields. Sets the price most of the world's oil " "tracks. Compare to WTI (the US benchmark).", case_sensitive=True, ), Term( "WTI", "West Texas Intermediate — the US crude oil benchmark. Priced " "out of Cushing, Oklahoma. Usually trades a few dollars below " "Brent because of where it's delivered.", case_sensitive=True, ), Term( "soft landing", "The Fed's hoped-for outcome: cooling inflation without triggering " "a recession. Historically rare — most rate-hike cycles end in " "downturn, not gentle deceleration.", ), Term( "hard landing", "Cooling inflation only because the economy tipped into recession. " "The opposite of a soft landing — rate hikes work, but at the " "cost of jobs and growth.", ), Term( "Magnificent 7", "Apple, Microsoft, Alphabet, Amazon, Nvidia, Meta, and Tesla — the " "seven US megacaps driving most of the S&P 500's gains since 2023. " "Concentration risk: when they wobble, the index does too.", aliases=("Mag 7", "Mag-7", "Magnificent Seven"), ), Term( "Treasury", "US government debt. 'Treasuries' covers everything from 4-week " "T-bills to 30-year bonds. Considered the world's safest asset; " "their yields are the baseline for almost everything else.", aliases=("Treasuries", "US Treasury", "US Treasuries"), case_sensitive=True, ), Term( "regime", "The broad market environment — what's driving prices right now. " "Examples: 'risk-on regime' (stocks and credit bid), 'rates-driven " "regime' (yields lead everything). Knowing the regime tells you " "which signals matter.", ), Term( "safe haven", "An asset investors flock to when scared — gold, the US dollar, " "Treasuries, sometimes the Swiss franc and yen. Their behaviour " "in a crisis tells you which fear is dominant.", ), Term( "Strait of Hormuz", "A narrow waterway between Iran and Oman that ~20% of the " "world's seaborne oil passes through. Tensions there spike " "oil prices instantly — it's the single most-watched geopolitical " "chokepoint for energy.", aliases=("Hormuz",), ), Term( "quantitative easing", "When a central bank prints new money and uses it to buy bonds " "in the open market. Pushes asset prices up, yields down. The " "post-2008 and 2020 playbook.", aliases=("QE",), ), Term( "quantitative tightening", "The reverse of QE — the central bank lets bonds it owns mature " "without replacing them, shrinking its balance sheet. Drains " "liquidity from markets.", aliases=("QT",), ), Term( "OAS", "Option-adjusted spread — the extra yield a corporate bond pays " "above a Treasury of similar maturity, after accounting for any " "embedded options. Widening OAS = market pricing more credit risk.", aliases=("option-adjusted spread",), case_sensitive=True, ), Term( "ATH", "All-time high — the highest level a price or index has ever " "reached. Often shorthand: 'S&P at ATH' = S&P 500 making new " "record highs.", case_sensitive=True, ), Term( "YoY", "Year-over-year — comparing a value to the same value 12 months " "earlier. 'CPI +3.8% YoY' = consumer prices are 3.8% higher than " "they were a year ago.", aliases=("year-over-year", "year over year"), case_sensitive=True, ), Term( "MoM", "Month-over-month — comparing a value to the previous month. " "Useful for spotting recent shifts, but noisier than YoY since " "one month is a small sample.", aliases=("month-over-month", "month over month"), case_sensitive=True, ), Term( "GDP", "Gross domestic product — the total value of goods and services " "an economy produces. The headline measure of economic size and " "growth. Markets care most about its rate of change.", case_sensitive=True, ), Term( "PMI", "Purchasing Managers' Index — a monthly survey of business " "activity. Reading above 50 = expansion; below 50 = contraction. " "Leading indicator for the broader economy.", case_sensitive=True, ), Term( "HY", "High yield — corporate bonds rated below investment grade ('junk " "bonds'). Pay more interest because there's more risk of default. " "Their behaviour signals how worried credit markets are.", aliases=("high yield", "high-yield"), case_sensitive=True, ), Term( "IG", "Investment grade — corporate bonds rated BBB- or higher by S&P. " "Considered low default risk. The bulk of the corporate bond " "market by value sits here.", aliases=("investment grade", "investment-grade"), case_sensitive=True, ), Term( "EM", "Emerging markets — economies still industrialising (China, India, " "Brazil, Mexico, Turkey, etc.). Higher growth potential but more " "volatile and currency-exposed than developed-market peers.", aliases=("emerging markets",), case_sensitive=True, ), Term( "DM", "Developed markets — mature economies with deep capital markets " "(US, UK, Eurozone, Japan, Australia). Slower growth but more " "stable than EM. The benchmark for global allocation.", aliases=("developed markets",), case_sensitive=True, ), Term( "rally", "A sustained move higher in a price or index. Distinct from a " "one-day bounce: implies multi-session momentum. The opposite of " "a sell-off or drawdown.", aliases=("rallies",), ), Term( "sell-off", "A sustained move lower across a market segment. Usually triggered " "by a shift in macro expectations (rate scare, growth scare, " "geopolitical risk) rather than single-stock news.", aliases=("selloff", "sell off"), ), Term( "drawdown", "How far a price has fallen from its recent peak. A 20% drawdown " "= a 20% drop from the high. The conventional threshold for a " "'bear market'.", ), Term( "positioning", "How much of a given asset investors collectively hold (or are " "short). Crowded long positioning leaves no buyers left when " "sentiment turns — that's when sell-offs accelerate.", ), ) def _build_pattern(term: Term) -> re.Pattern: """Compile a word-boundary regex for the term + its aliases.""" flags = 0 if term.case_sensitive else re.IGNORECASE forms = sorted([term.label, *term.aliases], key=len, reverse=True) escaped = "|".join(re.escape(f) for f in forms) return re.compile(rf"(? # breaks code samples, inside doubles up tooltips with the link, and # inside
 can break the formatting.
_PROTECTED_BLOCK_RE = re.compile(
    r"<(code|pre|a|script|style)\b[^>]*>.*?",
    re.IGNORECASE | re.DOTALL,
)

# Match a single HTML tag (open / close / self-closing) or a named/numeric
# entity. Used to split HTML into alternating "tag" and "text" segments so
# the term substitution only ever runs on text — never inside attribute
# values, where a stray match would corrupt previously-wrapped spans.
_TAG_OR_ENTITY_RE = re.compile(r"<[^>]+>|&[#a-zA-Z0-9]+;")


def _make_span(term: Term, matched_text: str) -> str:
    # No `title=` attribute: it would render a *second* native tooltip
    # alongside the JS-driven one. Mobile users get a tap-to-toggle path
    # from the JS handler in base.html.
    return (
        f'{matched_text}'
    )


def _wrap_first_match_in_text_segments(html: str, term: Term, pattern: re.Pattern) -> tuple[str, bool]:
    """Wrap the very first match of `pattern` that appears outside any
    HTML tag in `html`. Returns (new_html, wrapped). Walks alternating
    tag/text segments so attribute values from earlier wraps are not
    candidates for matching."""
    out_parts: list[str] = []
    last_end = 0
    wrapped = False
    for m in _TAG_OR_ENTITY_RE.finditer(html):
        text_segment = html[last_end:m.start()]
        if not wrapped and text_segment:
            match = pattern.search(text_segment)
            if match:
                out_parts.append(text_segment[:match.start()])
                out_parts.append(_make_span(term, match.group(0)))
                out_parts.append(text_segment[match.end():])
                wrapped = True
            else:
                out_parts.append(text_segment)
        else:
            out_parts.append(text_segment)
        out_parts.append(m.group(0))   # tag / entity — verbatim
        last_end = m.end()
    # Trailing text after the final tag.
    if last_end < len(html):
        text_segment = html[last_end:]
        if not wrapped:
            match = pattern.search(text_segment)
            if match:
                out_parts.append(text_segment[:match.start()])
                out_parts.append(_make_span(term, match.group(0)))
                out_parts.append(text_segment[match.end():])
                wrapped = True
            else:
                out_parts.append(text_segment)
        else:
            out_parts.append(text_segment)
    return "".join(out_parts), wrapped


def wrap_glossary(html: str, *, tone: str | None = None) -> str:
    """Wrap the first occurrence of each glossary term in the HTML with a
    `` so the frontend can render a tooltip.

    No-op unless `tone == "NOVICE"`. Wrapping is also a no-op if `html` is
    empty or None.

    Wrapping is **tag-aware**: each term is matched only against text
    that lies outside HTML tags. After wrapping a term, the new
    `` becomes part of the HTML; the next term's pass re-walks the
    tag/text segments, so it never matches inside the newly-added
    attribute values (e.g. the `HY` inside `data-term="HY OAS"`).
    Content inside , 
, ,