feat: add voice plugin with STT and TTS

Whisper STT: buffers incoming voice PCM per user, transcribes on silence gap via local whisper.cpp endpoint, posts results as actions. Piper TTS: !say fetches WAV from local Piper endpoint and plays via stream_audio(). 37 tests cover buffering, flush logic, transcription, WAV encoding, commands, and lifecycle.
2026-02-22 03:08:02 +01:00
parent 039f060b50
commit 9fbf45f67d
2 changed files with 843 additions and 0 deletions
@@ -0,0 +1,534 @@
+"""Tests for the voice STT/TTS plugin."""
+
+import asyncio
+import importlib.util
+import io
+import sys
+import time
+import wave
+from unittest.mock import AsyncMock, MagicMock, patch
+
+# -- Load plugin module directly ---------------------------------------------
+
+_spec = importlib.util.spec_from_file_location("voice", "plugins/voice.py")
+_mod = importlib.util.module_from_spec(_spec)
+sys.modules["voice"] = _mod
+_spec.loader.exec_module(_mod)
+
+
+# -- Fakes -------------------------------------------------------------------
+
+
+class _FakeState:
+    def __init__(self):
+        self._store: dict[str, dict[str, str]] = {}
+
+    def get(self, ns: str, key: str) -> str | None:
+        return self._store.get(ns, {}).get(key)
+
+    def set(self, ns: str, key: str, value: str) -> None:
+        self._store.setdefault(ns, {})[key] = value
+
+    def delete(self, ns: str, key: str) -> None:
+        self._store.get(ns, {}).pop(key, None)
+
+    def keys(self, ns: str) -> list[str]:
+        return list(self._store.get(ns, {}).keys())
+
+
+class _FakeBot:
+    """Minimal bot for voice plugin testing."""
+
+    def __init__(self, *, mumble: bool = True):
+        self.sent: list[tuple[str, str]] = []
+        self.replied: list[str] = []
+        self.actions: list[tuple[str, str]] = []
+        self.state = _FakeState()
+        self.config: dict = {}
+        self._pstate: dict = {}
+        self._tasks: set[asyncio.Task] = set()
+        self.nick = "derp"
+        self._sound_listeners: list = []
+        if mumble:
+            self.stream_audio = AsyncMock()
+
+    async def send(self, target: str, text: str) -> None:
+        self.sent.append((target, text))
+
+    async def reply(self, message, text: str) -> None:
+        self.replied.append(text)
+
+    async def action(self, target: str, text: str) -> None:
+        self.actions.append((target, text))
+
+    def _spawn(self, coro, *, name=None):
+        task = asyncio.ensure_future(coro)
+        self._tasks.add(task)
+        task.add_done_callback(self._tasks.discard)
+        return task
+
+
+class _Msg:
+    """Minimal message object."""
+
+    def __init__(self, text="!listen", nick="Alice", target="0",
+                 is_channel=True):
+        self.text = text
+        self.nick = nick
+        self.target = target
+        self.is_channel = is_channel
+        self.prefix = nick
+        self.command = "PRIVMSG"
+        self.params = [target, text]
+        self.tags = {}
+        self.raw = {}
+
+
+class _FakeSoundChunk:
+    """Minimal sound chunk with PCM data."""
+
+    def __init__(self, pcm: bytes = b"\x00\x00" * 960):
+        self.pcm = pcm
+
+
+# ---------------------------------------------------------------------------
+# TestMumbleGuard
+# ---------------------------------------------------------------------------
+
+
+class TestMumbleGuard:
+    def test_is_mumble_true(self):
+        bot = _FakeBot(mumble=True)
+        assert _mod._is_mumble(bot) is True
+
+    def test_is_mumble_false(self):
+        bot = _FakeBot(mumble=False)
+        assert _mod._is_mumble(bot) is False
+
+    def test_listen_non_mumble(self):
+        bot = _FakeBot(mumble=False)
+        msg = _Msg(text="!listen on")
+        asyncio.run(_mod.cmd_listen(bot, msg))
+        assert any("Mumble-only" in r for r in bot.replied)
+
+    def test_say_non_mumble(self):
+        bot = _FakeBot(mumble=False)
+        msg = _Msg(text="!say hello")
+        asyncio.run(_mod.cmd_say(bot, msg))
+        assert any("Mumble-only" in r for r in bot.replied)
+
+
+# ---------------------------------------------------------------------------
+# TestListenCommand
+# ---------------------------------------------------------------------------
+
+
+class TestListenCommand:
+    def test_listen_status(self):
+        bot = _FakeBot()
+        msg = _Msg(text="!listen")
+        asyncio.run(_mod.cmd_listen(bot, msg))
+        assert any("off" in r.lower() for r in bot.replied)
+
+    def test_listen_on(self):
+        bot = _FakeBot()
+        msg = _Msg(text="!listen on")
+        asyncio.run(_mod.cmd_listen(bot, msg))
+        ps = _mod._ps(bot)
+        assert ps["listen"] is True
+        assert any("Listening" in r for r in bot.replied)
+
+    def test_listen_off(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        ps["buffers"]["Alice"] = bytearray(b"\x00" * 100)
+        ps["last_ts"]["Alice"] = time.monotonic()
+        msg = _Msg(text="!listen off")
+        asyncio.run(_mod.cmd_listen(bot, msg))
+        assert ps["listen"] is False
+        assert ps["buffers"] == {}
+        assert ps["last_ts"] == {}
+        assert any("Stopped" in r for r in bot.replied)
+
+    def test_listen_invalid(self):
+        bot = _FakeBot()
+        msg = _Msg(text="!listen maybe")
+        asyncio.run(_mod.cmd_listen(bot, msg))
+        assert any("Usage" in r for r in bot.replied)
+
+
+# ---------------------------------------------------------------------------
+# TestSayCommand
+# ---------------------------------------------------------------------------
+
+
+class TestSayCommand:
+    def test_say_no_text(self):
+        bot = _FakeBot()
+        msg = _Msg(text="!say")
+        asyncio.run(_mod.cmd_say(bot, msg))
+        assert any("Usage" in r for r in bot.replied)
+
+    def test_say_too_long(self):
+        bot = _FakeBot()
+        text = "x" * 501
+        msg = _Msg(text=f"!say {text}")
+        asyncio.run(_mod.cmd_say(bot, msg))
+        assert any("too long" in r.lower() for r in bot.replied)
+
+    def test_say_spawns_task(self):
+        bot = _FakeBot()
+        msg = _Msg(text="!say hello world")
+
+        spawned = []
+        original_spawn = bot._spawn
+
+        def track_spawn(coro, *, name=None):
+            spawned.append(name)
+            coro.close()
+            task = MagicMock()
+            task.done.return_value = False
+            return task
+
+        bot._spawn = track_spawn
+        asyncio.run(_mod.cmd_say(bot, msg))
+        assert "voice-tts" in spawned
+
+
+# ---------------------------------------------------------------------------
+# TestAudioBuffering
+# ---------------------------------------------------------------------------
+
+
+class TestAudioBuffering:
+    def test_accumulates_pcm(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        user = {"name": "Alice"}
+        chunk = _FakeSoundChunk(b"\x01\x02" * 480)
+        _mod._on_voice(bot, user, chunk)
+        assert "Alice" in ps["buffers"]
+        assert len(ps["buffers"]["Alice"]) == 960
+
+    def test_ignores_own_nick(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        user = {"name": "derp"}
+        chunk = _FakeSoundChunk(b"\x01\x02" * 480)
+        _mod._on_voice(bot, user, chunk)
+        assert "derp" not in ps["buffers"]
+
+    def test_respects_listen_false(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = False
+        user = {"name": "Alice"}
+        chunk = _FakeSoundChunk(b"\x01\x02" * 480)
+        _mod._on_voice(bot, user, chunk)
+        assert ps["buffers"] == {}
+
+    def test_caps_at_max_bytes(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        user = {"name": "Alice"}
+        # Fill beyond max
+        big_chunk = _FakeSoundChunk(b"\x00\x01" * (_mod._MAX_BYTES // 2 + 100))
+        _mod._on_voice(bot, user, big_chunk)
+        assert len(ps["buffers"]["Alice"]) <= _mod._MAX_BYTES
+
+    def test_empty_pcm_ignored(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        user = {"name": "Alice"}
+        chunk = _FakeSoundChunk(b"")
+        _mod._on_voice(bot, user, chunk)
+        assert "Alice" not in ps["buffers"]
+
+    def test_none_user_ignored(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        chunk = _FakeSoundChunk(b"\x01\x02" * 480)
+        _mod._on_voice(bot, "not_a_dict", chunk)
+        assert ps["buffers"] == {}
+
+    def test_updates_timestamp(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        user = {"name": "Alice"}
+        chunk = _FakeSoundChunk(b"\x01\x02" * 480)
+        _mod._on_voice(bot, user, chunk)
+        assert "Alice" in ps["last_ts"]
+        ts1 = ps["last_ts"]["Alice"]
+        _mod._on_voice(bot, user, chunk)
+        assert ps["last_ts"]["Alice"] >= ts1
+
+
+# ---------------------------------------------------------------------------
+# TestFlushLogic
+# ---------------------------------------------------------------------------
+
+
+class TestFlushLogic:
+    def test_silence_gap_triggers_flush(self):
+        """Buffer is flushed and transcribed after silence gap."""
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        ps["silence_gap"] = 0.1  # very short for testing
+
+        # Pre-populate buffer with enough PCM (> _MIN_BYTES)
+        pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(pcm)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0  # already silent
+
+        async def _check():
+            with patch.object(_mod, "_transcribe", return_value="hello"):
+                task = asyncio.create_task(_mod._flush_monitor(bot))
+                await asyncio.sleep(1.0)
+                ps["listen"] = False  # stop the monitor
+                await asyncio.sleep(0.2)
+                try:
+                    await asyncio.wait_for(task, timeout=2)
+                except (asyncio.CancelledError, asyncio.TimeoutError):
+                    pass
+            assert any("hello" in a[1] for a in bot.actions)
+
+        asyncio.run(_check())
+
+    def test_min_duration_filter(self):
+        """Short utterances (< _MIN_BYTES) are discarded."""
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        ps["silence_gap"] = 0.1
+
+        # Buffer too small
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(b"\x00\x01" * 10)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0
+
+        async def _check():
+            with patch.object(_mod, "_transcribe", return_value="x") as mock_t:
+                task = asyncio.create_task(_mod._flush_monitor(bot))
+                await asyncio.sleep(0.5)
+                ps["listen"] = False
+                await asyncio.sleep(0.2)
+                try:
+                    await asyncio.wait_for(task, timeout=2)
+                except (asyncio.CancelledError, asyncio.TimeoutError):
+                    pass
+            mock_t.assert_not_called()
+
+        asyncio.run(_check())
+
+    def test_buffer_cleared_after_flush(self):
+        """Buffer and timestamp are removed after flushing."""
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        ps["silence_gap"] = 0.1
+
+        pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
+        with ps["lock"]:
+            ps["buffers"]["Alice"] = bytearray(pcm)
+            ps["last_ts"]["Alice"] = time.monotonic() - 1.0
+
+        async def _check():
+            with patch.object(_mod, "_transcribe", return_value="test"):
+                task = asyncio.create_task(_mod._flush_monitor(bot))
+                await asyncio.sleep(0.5)
+                ps["listen"] = False
+                await asyncio.sleep(0.2)
+                try:
+                    await asyncio.wait_for(task, timeout=2)
+                except (asyncio.CancelledError, asyncio.TimeoutError):
+                    pass
+            assert "Alice" not in ps["buffers"]
+            assert "Alice" not in ps["last_ts"]
+
+        asyncio.run(_check())
+
+
+# ---------------------------------------------------------------------------
+# TestPcmToWav
+# ---------------------------------------------------------------------------
+
+
+class TestPcmToWav:
+    def test_valid_wav(self):
+        pcm = b"\x00\x00" * 48000  # 1 second of silence
+        wav_data = _mod._pcm_to_wav(pcm)
+        # Should start with RIFF header
+        assert wav_data[:4] == b"RIFF"
+        # Parse it back
+        buf = io.BytesIO(wav_data)
+        with wave.open(buf, "rb") as wf:
+            assert wf.getnchannels() == 1
+            assert wf.getsampwidth() == 2
+            assert wf.getframerate() == 48000
+            assert wf.getnframes() == 48000
+
+    def test_empty_pcm(self):
+        wav_data = _mod._pcm_to_wav(b"")
+        buf = io.BytesIO(wav_data)
+        with wave.open(buf, "rb") as wf:
+            assert wf.getnframes() == 0
+
+
+# ---------------------------------------------------------------------------
+# TestTranscribe
+# ---------------------------------------------------------------------------
+
+
+class TestTranscribe:
+    def test_parse_json_response(self):
+        ps = {"whisper_url": "http://localhost:8080/inference"}
+        pcm = b"\x00\x00" * 4800  # 0.1s
+        resp = MagicMock()
+        resp.read.return_value = b'{"text": "hello world"}'
+        with patch.object(_mod, "_urlopen", return_value=resp):
+            text = _mod._transcribe(ps, pcm)
+        assert text == "hello world"
+
+    def test_empty_text(self):
+        ps = {"whisper_url": "http://localhost:8080/inference"}
+        pcm = b"\x00\x00" * 4800
+        resp = MagicMock()
+        resp.read.return_value = b'{"text": ""}'
+        with patch.object(_mod, "_urlopen", return_value=resp):
+            text = _mod._transcribe(ps, pcm)
+        assert text == ""
+
+    def test_missing_text_key(self):
+        ps = {"whisper_url": "http://localhost:8080/inference"}
+        pcm = b"\x00\x00" * 4800
+        resp = MagicMock()
+        resp.read.return_value = b'{"result": "something"}'
+        with patch.object(_mod, "_urlopen", return_value=resp):
+            text = _mod._transcribe(ps, pcm)
+        assert text == ""
+
+
+# ---------------------------------------------------------------------------
+# TestPerBotState
+# ---------------------------------------------------------------------------
+
+
+class TestPerBotState:
+    def test_ps_initializes(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        assert ps["listen"] is False
+        assert ps["buffers"] == {}
+        assert ps["last_ts"] == {}
+
+    def test_ps_stable_reference(self):
+        bot = _FakeBot()
+        ps1 = _mod._ps(bot)
+        ps2 = _mod._ps(bot)
+        assert ps1 is ps2
+
+    def test_ps_isolated_per_bot(self):
+        bot1 = _FakeBot()
+        bot2 = _FakeBot()
+        _mod._ps(bot1)["listen"] = True
+        assert _mod._ps(bot2)["listen"] is False
+
+    def test_ps_config_override(self):
+        bot = _FakeBot()
+        bot.config = {"voice": {"silence_gap": 3.0}}
+        ps = _mod._ps(bot)
+        assert ps["silence_gap"] == 3.0
+
+
+# ---------------------------------------------------------------------------
+# TestEnsureListener
+# ---------------------------------------------------------------------------
+
+
+class TestEnsureListener:
+    def test_registers_callback(self):
+        bot = _FakeBot()
+        _mod._ps(bot)  # init state
+        _mod._ensure_listener(bot)
+        assert len(bot._sound_listeners) == 1
+        ps = _mod._ps(bot)
+        assert ps["_listener_registered"] is True
+
+    def test_idempotent(self):
+        bot = _FakeBot()
+        _mod._ps(bot)
+        _mod._ensure_listener(bot)
+        _mod._ensure_listener(bot)
+        assert len(bot._sound_listeners) == 1
+
+    def test_no_listener_without_attr(self):
+        bot = _FakeBot()
+        del bot._sound_listeners
+        _mod._ps(bot)
+        _mod._ensure_listener(bot)
+        # Should not raise, just skip
+
+    def test_callback_calls_on_voice(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+        _mod._ensure_listener(bot)
+        user = {"name": "Alice"}
+        chunk = _FakeSoundChunk(b"\x01\x02" * 480)
+        bot._sound_listeners[0](user, chunk)
+        assert "Alice" in ps["buffers"]
+
+
+# ---------------------------------------------------------------------------
+# TestOnConnected
+# ---------------------------------------------------------------------------
+
+
+class TestOnConnected:
+    def test_reregisters_when_listening(self):
+        bot = _FakeBot()
+        ps = _mod._ps(bot)
+        ps["listen"] = True
+
+        spawned = []
+
+        def fake_spawn(coro, *, name=None):
+            task = MagicMock()
+            task.done.return_value = False
+            spawned.append(name)
+            coro.close()
+            return task
+
+        bot._spawn = fake_spawn
+        asyncio.run(_mod.on_connected(bot))
+        assert ps["_listener_registered"] is True
+        assert "voice-flush-monitor" in spawned
+
+    def test_noop_when_not_listening(self):
+        bot = _FakeBot()
+        _mod._ps(bot)  # init but listen=False
+
+        spawned = []
+
+        def fake_spawn(coro, *, name=None):
+            spawned.append(name)
+            coro.close()
+            return MagicMock()
+
+        bot._spawn = fake_spawn
+        asyncio.run(_mod.on_connected(bot))
+        assert "voice-flush-monitor" not in spawned
+
+    def test_noop_non_mumble(self):
+        bot = _FakeBot(mumble=False)
+        asyncio.run(_mod.on_connected(bot))
+        # Should not raise or register anything