feat: add always-on voice trigger mode with TTS echo

When [voice] trigger is set in config, the bot continuously listens and
transcribes voice. Speech starting with the trigger word is stripped and
echoed back via TTS. Non-triggered speech is silently discarded unless
!listen is also active.
This commit is contained in:
user
2026-02-22 03:24:03 +01:00
parent 7b9359c152
commit e127f72660
2 changed files with 221 additions and 11 deletions

View File

@@ -47,6 +47,7 @@ def _ps(bot):
cfg = getattr(bot, "config", {}).get("voice", {})
return bot._pstate.setdefault("voice", {
"listen": False,
"trigger": cfg.get("trigger", ""),
"buffers": {}, # {username: bytearray}
"last_ts": {}, # {username: float monotonic}
"flush_task": None,
@@ -83,7 +84,7 @@ def _pcm_to_wav(pcm: bytes) -> bytes:
def _on_voice(bot, user, sound_chunk):
"""Buffer incoming voice PCM per user. Runs on pymumble thread."""
ps = _ps(bot)
if not ps["listen"]:
if not ps["listen"] and not ps["trigger"]:
return
name = user["name"] if isinstance(user, dict) else None
if not name or name == bot.nick:
@@ -133,7 +134,7 @@ async def _flush_monitor(bot):
ps = _ps(bot)
loop = asyncio.get_running_loop()
try:
while ps["listen"]:
while ps["listen"] or ps["trigger"]:
await asyncio.sleep(_FLUSH_INTERVAL)
now = time.monotonic()
to_flush: list[tuple[str, bytes]] = []
@@ -158,8 +159,20 @@ async def _flush_monitor(bot):
continue
if not text or text.strip("., ") == "":
continue
log.info("voice: %s said: %s", name, text)
await bot.action("0", f"heard {name} say: {text}")
trigger = ps["trigger"]
if trigger and text.lower().startswith(trigger.lower()):
remainder = text[len(trigger):].strip()
if remainder:
log.info("voice: trigger from %s: %s", name, remainder)
bot._spawn(
_tts_play(bot, remainder), name="voice-tts",
)
continue
if ps["listen"]:
log.info("voice: %s said: %s", name, text)
await bot.action("0", f"heard {name} say: {text}")
except asyncio.CancelledError:
pass
except Exception:
@@ -256,7 +269,11 @@ async def cmd_listen(bot, message):
parts = message.text.split()
if len(parts) < 2:
state = "on" if ps["listen"] else "off"
await bot.reply(message, f"Listen: {state}")
trigger = ps["trigger"]
info = f"Listen: {state}"
if trigger:
info += f" | Trigger: {trigger}"
await bot.reply(message, info)
return
sub = parts[1].lower()
@@ -267,10 +284,11 @@ async def cmd_listen(bot, message):
await bot.reply(message, "Listening for voice")
elif sub == "off":
ps["listen"] = False
with ps["lock"]:
ps["buffers"].clear()
ps["last_ts"].clear()
_stop_flush_task(bot)
if not ps["trigger"]:
with ps["lock"]:
ps["buffers"].clear()
ps["last_ts"].clear()
_stop_flush_task(bot)
await bot.reply(message, "Stopped listening")
else:
await bot.reply(message, "Usage: !listen [on|off]")
@@ -300,10 +318,10 @@ async def cmd_say(bot, message):
async def on_connected(bot) -> None:
"""Re-register listener after reconnect if listen was on."""
"""Re-register listener after reconnect if listen or trigger is active."""
if not _is_mumble(bot):
return
ps = _ps(bot)
if ps["listen"]:
if ps["listen"] or ps["trigger"]:
_ensure_listener(bot)
_ensure_flush_task(bot)

View File

@@ -532,3 +532,195 @@ class TestOnConnected:
bot = _FakeBot(mumble=False)
asyncio.run(_mod.on_connected(bot))
# Should not raise or register anything
# ---------------------------------------------------------------------------
# TestTriggerMode
# ---------------------------------------------------------------------------
class TestTriggerMode:
def test_trigger_config(self):
"""_ps() reads trigger from config."""
bot = _FakeBot()
bot.config = {"voice": {"trigger": "claude"}}
ps = _mod._ps(bot)
assert ps["trigger"] == "claude"
def test_trigger_default_empty(self):
"""trigger defaults to empty string (disabled)."""
bot = _FakeBot()
ps = _mod._ps(bot)
assert ps["trigger"] == ""
def test_trigger_buffers_without_listen(self):
"""_on_voice buffers when trigger is set, even with listen=False."""
bot = _FakeBot()
bot.config = {"voice": {"trigger": "claude"}}
ps = _mod._ps(bot)
assert ps["listen"] is False
user = {"name": "Alice"}
chunk = _FakeSoundChunk(b"\x01\x02" * 480)
_mod._on_voice(bot, user, chunk)
assert "Alice" in ps["buffers"]
assert len(ps["buffers"]["Alice"]) == 960
def test_trigger_detected_spawns_tts(self):
"""Flush monitor detects trigger word and spawns TTS."""
bot = _FakeBot()
bot.config = {"voice": {"trigger": "claude"}}
ps = _mod._ps(bot)
ps["silence_gap"] = 0.1
pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
with ps["lock"]:
ps["buffers"]["Alice"] = bytearray(pcm)
ps["last_ts"]["Alice"] = time.monotonic() - 1.0
spawned = []
async def _check():
tts_hit = asyncio.Event()
def track_spawn(coro, *, name=None):
spawned.append(name)
if name == "voice-tts":
tts_hit.set()
coro.close()
task = MagicMock()
task.done.return_value = False
return task
bot._spawn = track_spawn
with patch.object(_mod, "_transcribe",
return_value="claude hello world"):
task = asyncio.create_task(_mod._flush_monitor(bot))
await asyncio.wait_for(tts_hit.wait(), timeout=5)
ps["trigger"] = ""
await asyncio.sleep(0.1)
try:
await asyncio.wait_for(task, timeout=2)
except (asyncio.CancelledError, asyncio.TimeoutError):
pass
assert "voice-tts" in spawned
asyncio.run(_check())
def test_trigger_strips_word(self):
"""Trigger word is stripped; only remainder goes to TTS."""
bot = _FakeBot()
bot.config = {"voice": {"trigger": "claude"}}
ps = _mod._ps(bot)
ps["silence_gap"] = 0.1
pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
with ps["lock"]:
ps["buffers"]["Alice"] = bytearray(pcm)
ps["last_ts"]["Alice"] = time.monotonic() - 1.0
tts_texts = []
async def _check():
tts_hit = asyncio.Event()
async def _noop():
pass
def capturing_tts(bot_, text):
tts_texts.append(text)
return _noop()
def track_spawn(coro, *, name=None):
if name == "voice-tts":
tts_hit.set()
coro.close()
task = MagicMock()
task.done.return_value = False
return task
bot._spawn = track_spawn
original_tts = _mod._tts_play
_mod._tts_play = capturing_tts
try:
with patch.object(_mod, "_transcribe",
return_value="Claude hello world"):
task = asyncio.create_task(_mod._flush_monitor(bot))
await asyncio.wait_for(tts_hit.wait(), timeout=5)
ps["trigger"] = ""
await asyncio.sleep(0.1)
try:
await asyncio.wait_for(task, timeout=2)
except (asyncio.CancelledError, asyncio.TimeoutError):
pass
finally:
_mod._tts_play = original_tts
assert tts_texts == ["hello world"]
asyncio.run(_check())
def test_no_trigger_discards(self):
"""Non-triggered speech is silently discarded when only trigger active."""
bot = _FakeBot()
bot.config = {"voice": {"trigger": "claude"}}
ps = _mod._ps(bot)
ps["silence_gap"] = 0.1
pcm = b"\x00\x01" * (_mod._MIN_BYTES // 2 + 100)
with ps["lock"]:
ps["buffers"]["Alice"] = bytearray(pcm)
ps["last_ts"]["Alice"] = time.monotonic() - 1.0
async def _check():
transcribed = asyncio.Event()
loop = asyncio.get_running_loop()
def mock_transcribe(ps_, pcm_):
loop.call_soon_threadsafe(transcribed.set)
return "hello world"
with patch.object(_mod, "_transcribe",
side_effect=mock_transcribe):
task = asyncio.create_task(_mod._flush_monitor(bot))
await asyncio.wait_for(transcribed.wait(), timeout=5)
# Give the monitor a moment to process the result
await asyncio.sleep(0.2)
ps["trigger"] = ""
await asyncio.sleep(0.1)
try:
await asyncio.wait_for(task, timeout=2)
except (asyncio.CancelledError, asyncio.TimeoutError):
pass
assert bot.actions == []
asyncio.run(_check())
def test_on_connected_starts_with_trigger(self):
"""Listener and flush task start on connect when trigger is set."""
bot = _FakeBot()
bot.config = {"voice": {"trigger": "claude"}}
ps = _mod._ps(bot)
spawned = []
def fake_spawn(coro, *, name=None):
task = MagicMock()
task.done.return_value = False
spawned.append(name)
coro.close()
return task
bot._spawn = fake_spawn
asyncio.run(_mod.on_connected(bot))
assert ps["_listener_registered"] is True
assert "voice-flush-monitor" in spawned
def test_listen_status_shows_trigger(self):
"""!listen status includes trigger info when set."""
bot = _FakeBot()
bot.config = {"voice": {"trigger": "claude"}}
_mod._ps(bot)
msg = _Msg(text="!listen")
asyncio.run(_mod.cmd_listen(bot, msg))
assert any("Trigger: claude" in r for r in bot.replied)