feat: add always-on voice trigger mode with TTS echo

When [voice] trigger is set in config, the bot continuously listens and transcribes voice. Speech starting with the trigger word is stripped and echoed back via TTS. Non-triggered speech is silently discarded unless !listen is also active.
2026-02-22 03:24:03 +01:00
parent 7b9359c152
commit e127f72660
2 changed files with 221 additions and 11 deletions
--- a/plugins/voice.py
+++ b/plugins/voice.py
@@ -47,6 +47,7 @@ def _ps(bot):
    cfg = getattr(bot, "config", {}).get("voice", {})
    return bot._pstate.setdefault("voice", {
        "listen": False,
+        "trigger": cfg.get("trigger", ""),
        "buffers": {},          # {username: bytearray}
        "last_ts": {},          # {username: float monotonic}
        "flush_task": None,
@@ -83,7 +84,7 @@ def _pcm_to_wav(pcm: bytes) -> bytes:
 def _on_voice(bot, user, sound_chunk):
    """Buffer incoming voice PCM per user.  Runs on pymumble thread."""
    ps = _ps(bot)
-    if not ps["listen"]:
+    if not ps["listen"] and not ps["trigger"]:
        return
    name = user["name"] if isinstance(user, dict) else None
    if not name or name == bot.nick:
@@ -133,7 +134,7 @@ async def _flush_monitor(bot):
    ps = _ps(bot)
    loop = asyncio.get_running_loop()
    try:
-        while ps["listen"]:
+        while ps["listen"] or ps["trigger"]:
            await asyncio.sleep(_FLUSH_INTERVAL)
            now = time.monotonic()
            to_flush: list[tuple[str, bytes]] = []
@@ -158,8 +159,20 @@ async def _flush_monitor(bot):
                    continue
                if not text or text.strip("., ") == "":
                    continue
-                log.info("voice: %s said: %s", name, text)
-                await bot.action("0", f"heard {name} say: {text}")
+
+                trigger = ps["trigger"]
+                if trigger and text.lower().startswith(trigger.lower()):
+                    remainder = text[len(trigger):].strip()
+                    if remainder:
+                        log.info("voice: trigger from %s: %s", name, remainder)
+                        bot._spawn(
+                            _tts_play(bot, remainder), name="voice-tts",
+                        )
+                    continue
+
+                if ps["listen"]:
+                    log.info("voice: %s said: %s", name, text)
+                    await bot.action("0", f"heard {name} say: {text}")
    except asyncio.CancelledError:
        pass
    except Exception:
@@ -256,7 +269,11 @@ async def cmd_listen(bot, message):
    parts = message.text.split()
    if len(parts) < 2:
        state = "on" if ps["listen"] else "off"
-        await bot.reply(message, f"Listen: {state}")
+        trigger = ps["trigger"]
+        info = f"Listen: {state}"
+        if trigger:
+            info += f" | Trigger: {trigger}"
+        await bot.reply(message, info)
        return

    sub = parts[1].lower()
@@ -267,10 +284,11 @@ async def cmd_listen(bot, message):
        await bot.reply(message, "Listening for voice")
    elif sub == "off":
        ps["listen"] = False
-        with ps["lock"]:
-            ps["buffers"].clear()
-            ps["last_ts"].clear()
-        _stop_flush_task(bot)
+        if not ps["trigger"]:
+            with ps["lock"]:
+                ps["buffers"].clear()
+                ps["last_ts"].clear()
+            _stop_flush_task(bot)
        await bot.reply(message, "Stopped listening")
    else:
        await bot.reply(message, "Usage: !listen [on|off]")
@@ -300,10 +318,10 @@ async def cmd_say(bot, message):


 async def on_connected(bot) -> None:
-    """Re-register listener after reconnect if listen was on."""
+    """Re-register listener after reconnect if listen or trigger is active."""
    if not _is_mumble(bot):
        return
    ps = _ps(bot)
-    if ps["listen"]:
+    if ps["listen"] or ps["trigger"]:
        _ensure_listener(bot)
        _ensure_flush_task(bot)
--- a/tests/test_voice.py
+++ b/tests/test_voice.py
@@ -532,3 +532,195 @@ class TestOnConnected:
        bot = _FakeBot(mumble=False)
        asyncio.run(_mod.on_connected(bot))
        # Should not raise or register anything
+
+
+# ---------------------------------------------------------------------------
+# TestTriggerMode
+# ---------------------------------------------------------------------------
+
+
+class TestTriggerMode:
+    def test_trigger_config(self):
+        """_ps() reads trigger from config."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        assert ps["trigger"] == "claude"
+
+    def test_trigger_default_empty(self):
+        """trigger defaults to empty string (disabled)."""
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        assert ps["trigger"] == ""
+
+    def test_trigger_buffers_without_listen(self):
+        """_on_voice buffers when trigger is set, even with listen=False."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        assert ps["listen"] is False
+        user = {"name": "Alice"}
+        chunk = _FakeSoundChunk(b"\x01\x02" * 480)
+        _mod._on_voice(bot, user, chunk)
+        assert "Alice" in ps["buffers"]
+        assert len(ps["buffers"]["Alice"]) == 960
+
+    def test_trigger_detected_spawns_tts(self):
+        """Flush monitor detects trigger word and spawns TTS."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        ps["silence_gap"] = 0.1
+
+        pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(pcm)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0
+
+        spawned = []
+
+        async def _check():
+            tts_hit = asyncio.Event()
+
+            def track_spawn(coro, *, name=None):
+                spawned.append(name)
+                if name == "voice-tts":
+                    tts_hit.set()
+                coro.close()
+                task = MagicMock()
+                task.done.return_value = False
+                return task
+
+            bot._spawn = track_spawn
+
+            with patch.object(_mod, "_transcribe",
+                              return_value="claude hello world"):
+                task = asyncio.create_task(_mod._flush_monitor(bot))
+                await asyncio.wait_for(tts_hit.wait(), timeout=5)
+                ps["trigger"] = ""
+                await asyncio.sleep(0.1)
+                try:
+                    await asyncio.wait_for(task, timeout=2)
+                except (asyncio.CancelledError, asyncio.TimeoutError):
+                    pass
+            assert "voice-tts" in spawned
+
+        asyncio.run(_check())
+
+    def test_trigger_strips_word(self):
+        """Trigger word is stripped; only remainder goes to TTS."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        ps["silence_gap"] = 0.1
+
+        pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(pcm)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0
+
+        tts_texts = []
+
+        async def _check():
+            tts_hit = asyncio.Event()
+
+            async def _noop():
+                pass
+
+            def capturing_tts(bot_, text):
+                tts_texts.append(text)
+                return _noop()
+
+            def track_spawn(coro, *, name=None):
+                if name == "voice-tts":
+                    tts_hit.set()
+                coro.close()
+                task = MagicMock()
+                task.done.return_value = False
+                return task
+
+            bot._spawn = track_spawn
+            original_tts = _mod._tts_play
+            _mod._tts_play = capturing_tts
+
+            try:
+                with patch.object(_mod, "_transcribe",
+                                  return_value="Claude hello world"):
+                    task = asyncio.create_task(_mod._flush_monitor(bot))
+                    await asyncio.wait_for(tts_hit.wait(), timeout=5)
+                    ps["trigger"] = ""
+                    await asyncio.sleep(0.1)
+                    try:
+                        await asyncio.wait_for(task, timeout=2)
+                    except (asyncio.CancelledError, asyncio.TimeoutError):
+                        pass
+            finally:
+                _mod._tts_play = original_tts
+            assert tts_texts == ["hello world"]
+
+        asyncio.run(_check())
+
+    def test_no_trigger_discards(self):
+        """Non-triggered speech is silently discarded when only trigger active."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        ps["silence_gap"] = 0.1
+
+        pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(pcm)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0
+
+        async def _check():
+            transcribed = asyncio.Event()
+            loop = asyncio.get_running_loop()
+
+            def mock_transcribe(ps_, pcm_):
+                loop.call_soon_threadsafe(transcribed.set)
+                return "hello world"
+
+            with patch.object(_mod, "_transcribe",
+                              side_effect=mock_transcribe):
+                task = asyncio.create_task(_mod._flush_monitor(bot))
+                await asyncio.wait_for(transcribed.wait(), timeout=5)
+                # Give the monitor a moment to process the result
+                await asyncio.sleep(0.2)
+                ps["trigger"] = ""
+                await asyncio.sleep(0.1)
+                try:
+                    await asyncio.wait_for(task, timeout=2)
+                except (asyncio.CancelledError, asyncio.TimeoutError):
+                    pass
+            assert bot.actions == []
+
+        asyncio.run(_check())
+
+    def test_on_connected_starts_with_trigger(self):
+        """Listener and flush task start on connect when trigger is set."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+
+        spawned = []
+
+        def fake_spawn(coro, *, name=None):
+            task = MagicMock()
+            task.done.return_value = False
+            spawned.append(name)
+            coro.close()
+            return task
+
+        bot._spawn = fake_spawn
+        asyncio.run(_mod.on_connected(bot))
+        assert ps["_listener_registered"] is True
+        assert "voice-flush-monitor" in spawned
+
+    def test_listen_status_shows_trigger(self):
+        """!listen status includes trigger info when set."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        _mod._ps(bot)
+        msg = _Msg(text="!listen")
+        asyncio.run(_mod.cmd_listen(bot, msg))
+        assert any("Trigger: claude" in r for r in bot.replied)