feat: add always-on voice trigger mode with TTS echo

When [voice] trigger is set in config, the bot continuously listens and transcribes voice. Speech starting with the trigger word is stripped and echoed back via TTS. Non-triggered speech is silently discarded unless !listen is also active.
2026-02-22 03:24:03 +01:00
parent 7b9359c152
commit e127f72660
2 changed files with 221 additions and 11 deletions
--- a/tests/test_voice.py
+++ b/tests/test_voice.py
@@ -532,3 +532,195 @@ class TestOnConnected:
        bot = _FakeBot(mumble=False)
        asyncio.run(_mod.on_connected(bot))
        # Should not raise or register anything
+
+
+# ---------------------------------------------------------------------------
+# TestTriggerMode
+# ---------------------------------------------------------------------------
+
+
+class TestTriggerMode:
+    def test_trigger_config(self):
+        """_ps() reads trigger from config."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        assert ps["trigger"] == "claude"
+
+    def test_trigger_default_empty(self):
+        """trigger defaults to empty string (disabled)."""
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        assert ps["trigger"] == ""
+
+    def test_trigger_buffers_without_listen(self):
+        """_on_voice buffers when trigger is set, even with listen=False."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        assert ps["listen"] is False
+        user = {"name": "Alice"}
+        chunk = _FakeSoundChunk(b"\x01\x02" * 480)
+        _mod._on_voice(bot, user, chunk)
+        assert "Alice" in ps["buffers"]
+        assert len(ps["buffers"]["Alice"]) == 960
+
+    def test_trigger_detected_spawns_tts(self):
+        """Flush monitor detects trigger word and spawns TTS."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        ps["silence_gap"] = 0.1
+
+        pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(pcm)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0
+
+        spawned = []
+
+        async def _check():
+            tts_hit = asyncio.Event()
+
+            def track_spawn(coro, *, name=None):
+                spawned.append(name)
+                if name == "voice-tts":
+                    tts_hit.set()
+                coro.close()
+                task = MagicMock()
+                task.done.return_value = False
+                return task
+
+            bot._spawn = track_spawn
+
+            with patch.object(_mod, "_transcribe",
+                              return_value="claude hello world"):
+                task = asyncio.create_task(_mod._flush_monitor(bot))
+                await asyncio.wait_for(tts_hit.wait(), timeout=5)
+                ps["trigger"] = ""
+                await asyncio.sleep(0.1)
+                try:
+                    await asyncio.wait_for(task, timeout=2)
+                except (asyncio.CancelledError, asyncio.TimeoutError):
+                    pass
+            assert "voice-tts" in spawned
+
+        asyncio.run(_check())
+
+    def test_trigger_strips_word(self):
+        """Trigger word is stripped; only remainder goes to TTS."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        ps["silence_gap"] = 0.1
+
+        pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(pcm)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0
+
+        tts_texts = []
+
+        async def _check():
+            tts_hit = asyncio.Event()
+
+            async def _noop():
+                pass
+
+            def capturing_tts(bot_, text):
+                tts_texts.append(text)
+                return _noop()
+
+            def track_spawn(coro, *, name=None):
+                if name == "voice-tts":
+                    tts_hit.set()
+                coro.close()
+                task = MagicMock()
+                task.done.return_value = False
+                return task
+
+            bot._spawn = track_spawn
+            original_tts = _mod._tts_play
+            _mod._tts_play = capturing_tts
+
+            try:
+                with patch.object(_mod, "_transcribe",
+                                  return_value="Claude hello world"):
+                    task = asyncio.create_task(_mod._flush_monitor(bot))
+                    await asyncio.wait_for(tts_hit.wait(), timeout=5)
+                    ps["trigger"] = ""
+                    await asyncio.sleep(0.1)
+                    try:
+                        await asyncio.wait_for(task, timeout=2)
+                    except (asyncio.CancelledError, asyncio.TimeoutError):
+                        pass
+            finally:
+                _mod._tts_play = original_tts
+            assert tts_texts == ["hello world"]
+
+        asyncio.run(_check())
+
+    def test_no_trigger_discards(self):
+        """Non-triggered speech is silently discarded when only trigger active."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+        ps["silence_gap"] = 0.1
+
+        pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(pcm)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0
+
+        async def _check():
+            transcribed = asyncio.Event()
+            loop = asyncio.get_running_loop()
+
+            def mock_transcribe(ps_, pcm_):
+                loop.call_soon_threadsafe(transcribed.set)
+                return "hello world"
+
+            with patch.object(_mod, "_transcribe",
+                              side_effect=mock_transcribe):
+                task = asyncio.create_task(_mod._flush_monitor(bot))
+                await asyncio.wait_for(transcribed.wait(), timeout=5)
+                # Give the monitor a moment to process the result
+                await asyncio.sleep(0.2)
+                ps["trigger"] = ""
+                await asyncio.sleep(0.1)
+                try:
+                    await asyncio.wait_for(task, timeout=2)
+                except (asyncio.CancelledError, asyncio.TimeoutError):
+                    pass
+            assert bot.actions == []
+
+        asyncio.run(_check())
+
+    def test_on_connected_starts_with_trigger(self):
+        """Listener and flush task start on connect when trigger is set."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        ps = _mod._ps(bot)
+
+        spawned = []
+
+        def fake_spawn(coro, *, name=None):
+            task = MagicMock()
+            task.done.return_value = False
+            spawned.append(name)
+            coro.close()
+            return task
+
+        bot._spawn = fake_spawn
+        asyncio.run(_mod.on_connected(bot))
+        assert ps["_listener_registered"] is True
+        assert "voice-flush-monitor" in spawned
+
+    def test_listen_status_shows_trigger(self):
+        """!listen status includes trigger info when set."""
+        bot = _FakeBot()
+        bot.config = {"voice": {"trigger": "claude"}}
+        _mod._ps(bot)
+        msg = _Msg(text="!listen")
+        asyncio.run(_mod.cmd_listen(bot, msg))
+        assert any("Trigger: claude" in r for r in bot.replied)