feat: add always-on voice trigger mode with TTS echo

When [voice] trigger is set in config, the bot continuously listens and transcribes voice. Speech starting with the trigger word is stripped and echoed back via TTS. Non-triggered speech is silently discarded unless !listen is also active.
2026-02-22 03:24:03 +01:00
parent 7b9359c152
commit e127f72660
2 changed files with 221 additions and 11 deletions
--- a/plugins/voice.py
+++ b/plugins/voice.py
@@ -47,6 +47,7 @@ def _ps(bot):
    cfg = getattr(bot, "config", {}).get("voice", {})
    return bot._pstate.setdefault("voice", {
        "listen": False,
+        "trigger": cfg.get("trigger", ""),
        "buffers": {},          # {username: bytearray}
        "last_ts": {},          # {username: float monotonic}
        "flush_task": None,
@@ -83,7 +84,7 @@ def _pcm_to_wav(pcm: bytes) -> bytes:
 def _on_voice(bot, user, sound_chunk):
    """Buffer incoming voice PCM per user.  Runs on pymumble thread."""
    ps = _ps(bot)
-    if not ps["listen"]:
+    if not ps["listen"] and not ps["trigger"]:
        return
    name = user["name"] if isinstance(user, dict) else None
    if not name or name == bot.nick:
@@ -133,7 +134,7 @@ async def _flush_monitor(bot):
    ps = _ps(bot)
    loop = asyncio.get_running_loop()
    try:
-        while ps["listen"]:
+        while ps["listen"] or ps["trigger"]:
            await asyncio.sleep(_FLUSH_INTERVAL)
            now = time.monotonic()
            to_flush: list[tuple[str, bytes]] = []
@@ -158,8 +159,20 @@ async def _flush_monitor(bot):
                    continue
                if not text or text.strip("., ") == "":
                    continue
-                log.info("voice: %s said: %s", name, text)
-                await bot.action("0", f"heard {name} say: {text}")
+
+                trigger = ps["trigger"]
+                if trigger and text.lower().startswith(trigger.lower()):
+                    remainder = text[len(trigger):].strip()
+                    if remainder:
+                        log.info("voice: trigger from %s: %s", name, remainder)
+                        bot._spawn(
+                            _tts_play(bot, remainder), name="voice-tts",
+                        )
+                    continue
+
+                if ps["listen"]:
+                    log.info("voice: %s said: %s", name, text)
+                    await bot.action("0", f"heard {name} say: {text}")
    except asyncio.CancelledError:
        pass
    except Exception:
@@ -256,7 +269,11 @@ async def cmd_listen(bot, message):
    parts = message.text.split()
    if len(parts) < 2:
        state = "on" if ps["listen"] else "off"
-        await bot.reply(message, f"Listen: {state}")
+        trigger = ps["trigger"]
+        info = f"Listen: {state}"
+        if trigger:
+            info += f" | Trigger: {trigger}"
+        await bot.reply(message, info)
        return

    sub = parts[1].lower()
@@ -267,10 +284,11 @@ async def cmd_listen(bot, message):
        await bot.reply(message, "Listening for voice")
    elif sub == "off":
        ps["listen"] = False
-        with ps["lock"]:
-            ps["buffers"].clear()
-            ps["last_ts"].clear()
-        _stop_flush_task(bot)
+        if not ps["trigger"]:
+            with ps["lock"]:
+                ps["buffers"].clear()
+                ps["last_ts"].clear()
+            _stop_flush_task(bot)
        await bot.reply(message, "Stopped listening")
    else:
        await bot.reply(message, "Usage: !listen [on|off]")
@@ -300,10 +318,10 @@ async def cmd_say(bot, message):


 async def on_connected(bot) -> None:
-    """Re-register listener after reconnect if listen was on."""
+    """Re-register listener after reconnect if listen or trigger is active."""
    if not _is_mumble(bot):
        return
    ps = _ps(bot)
-    if ps["listen"]:
+    if ps["listen"] or ps["trigger"]:
        _ensure_listener(bot)
        _ensure_flush_task(bot)