read.markets/app/services/i18n.py

"""Language registry + prompt helpers for localized AI output.

Two surfaces consume this module:
- Per-user LLM call sites (portfolio analysis only at this stage) call
  ``respond_in_clause(user.lang)`` and append the result to their
  system prompt.
- The settings dropdown + its PATCH endpoint consult ``ACTIVE_LANGUAGES``
  to decide which options are selectable. The strategic-log and digest
  translation fan-outs also consult it to decide which languages to
  spend tokens on.

Adding Spanish/French/German support later is a one-line constant
change: extend ``ACTIVE_LANGUAGES`` to include the new code. No other
code change is required — the rest of the system already treats them
as first-class via ``LANGUAGES``.
"""
from __future__ import annotations


# Display labels for every language the system knows about. ES/FR/DE
# are kept here so labels still render in the dropdown (as disabled
# options) without requiring code changes to enable them later.
LANGUAGES: dict[str, str] = {
    "en": "English",
    "it": "Italian",
    "es": "Spanish",
    "fr": "French",
    "de": "German",
}


# Languages users can actually select. Settings POST validates against
# this; the strategic-log + digest translation fan-outs only consider
# these.
ACTIVE_LANGUAGES: set[str] = {"en", "it"}


def respond_in_clause(lang: str | None) -> str:
    """Suffix appended to per-user LLM system prompts.

    Returns an empty string for ``en`` (no nudge needed), an unknown
    code, or ``None``/empty input — those callers want the default
    English path. Otherwise returns ``"\\n\\nRespond in <Language>."``
    keyed off ``LANGUAGES``.
    """
    if not lang or lang == "en" or lang not in LANGUAGES:
        return ""
    return f"\n\nRespond in {LANGUAGES[lang]}."


def language_directive_lead(lang: str | None) -> str:
    """Strong, top-of-prompt language directive for callers that
    generate user-facing prose in real time (portfolio analysis,
    chat) and need the output to actually land in the user's
    preferred language. A single tail clause like
    ``respond_in_clause`` is easy for the model to ignore when the
    rest of the prompt + user message are entirely in English; this
    leads with an explicit "all output in X" block, kept verbatim
    rules for symbols/numbers, and is intended to be prepended to
    the system prompt so the model anchors on the target language
    before reading the rest. Combined with respond_in_clause at the
    tail it gives a belt-and-suspenders defence.

    Empty string for English or unknown codes so callers can paste
    it in unconditionally.
    """
    if not lang or lang == "en" or lang not in LANGUAGES:
        return ""
    language = LANGUAGES[lang]
    return (
        f"# LANGUAGE — write everything in {language}\n"
        f"All output — section headers, prose, lists, and any inline "
        f"labels — must be written in {language}. Do NOT mix English in. "
        f"Ticker symbols (AAPL, MSFT, VOD.L), ISO currency codes "
        f"(USD, EUR, GBP), and numeric values stay unchanged.\n\n"
    )