diff --git a/src/tuimble/app.py b/src/tuimble/app.py index e8de001..40a08d0 100644 --- a/src/tuimble/app.py +++ b/src/tuimble/app.py @@ -5,6 +5,7 @@ from __future__ import annotations import html import logging import time +from html.parser import HTMLParser from textual import events, on, work from textual.app import App, ComposeResult @@ -991,9 +992,22 @@ class TuimbleApp(App): self.exit() +class _HTMLStripper(HTMLParser): + """Extract text content from HTML, discarding all tags.""" + + def __init__(self): + super().__init__() + self._parts: list[str] = [] + + def handle_data(self, data: str): + self._parts.append(data) + + def get_text(self) -> str: + return "".join(self._parts) + + def _strip_html(text: str) -> str: """Remove HTML tags and unescape entities from Mumble messages.""" - import re - - clean = re.sub(r"<[^>]+>", "", text) - return html.unescape(clean) + stripper = _HTMLStripper() + stripper.feed(text) + return html.unescape(stripper.get_text())