doubles up tooltips with the link, and
# inside can break the formatting.
_PROTECTED_BLOCK_RE = re.compile(
r"<(code|pre|a|script|style)\b[^>]*>.*?\1>",
re.IGNORECASE | re.DOTALL,
)
# Match a single HTML tag (open / close / self-closing) or a named/numeric
# entity. Used to split HTML into alternating "tag" and "text" segments so
# the term substitution only ever runs on text — never inside attribute
# values, where a stray match would corrupt previously-wrapped spans.
_TAG_OR_ENTITY_RE = re.compile(r"<[^>]+>|&[#a-zA-Z0-9]+;")
def _make_span(term: Term, matched_text: str) -> str:
# No `title=` attribute: it would render a *second* native tooltip
# alongside the JS-driven one. Mobile users get a tap-to-toggle path
# from the JS handler in base.html.
return (
f'{matched_text}'
)
def _wrap_first_match_in_text_segments(html: str, term: Term, pattern: re.Pattern) -> tuple[str, bool]:
"""Wrap the very first match of `pattern` that appears outside any
HTML tag in `html`. Returns (new_html, wrapped). Walks alternating
tag/text segments so attribute values from earlier wraps are not
candidates for matching."""
out_parts: list[str] = []
last_end = 0
wrapped = False
for m in _TAG_OR_ENTITY_RE.finditer(html):
text_segment = html[last_end:m.start()]
if not wrapped and text_segment:
match = pattern.search(text_segment)
if match:
out_parts.append(text_segment[:match.start()])
out_parts.append(_make_span(term, match.group(0)))
out_parts.append(text_segment[match.end():])
wrapped = True
else:
out_parts.append(text_segment)
else:
out_parts.append(text_segment)
out_parts.append(m.group(0)) # tag / entity — verbatim
last_end = m.end()
# Trailing text after the final tag.
if last_end < len(html):
text_segment = html[last_end:]
if not wrapped:
match = pattern.search(text_segment)
if match:
out_parts.append(text_segment[:match.start()])
out_parts.append(_make_span(term, match.group(0)))
out_parts.append(text_segment[match.end():])
wrapped = True
else:
out_parts.append(text_segment)
else:
out_parts.append(text_segment)
return "".join(out_parts), wrapped
def wrap_glossary(html: str, *, tone: str | None = None) -> str:
"""Wrap the first occurrence of each glossary term in the HTML with a
`` so the frontend can render a tooltip.
No-op unless `tone == "NOVICE"`. Wrapping is also a no-op if `html` is
empty or None.
Wrapping is **tag-aware**: each term is matched only against text
that lies outside HTML tags. After wrapping a term, the new
`` becomes part of the HTML; the next term's pass re-walks the
tag/text segments, so it never matches inside the newly-added
attribute values (e.g. the `HY` inside `data-term="HY OAS"`).
Content inside , , ,