feat: ack tone, duck-before-TTS, instant ducking on voice/unmute
- Add ascending two-tone chime (880Hz/1320Hz) before TTS playback as audible acknowledgment that the voice trigger was recognized - Signal music ducking 1.5s before TTS starts so music is already lowered when audio begins playing - Snap duck volume to floor instantly on voice packet or user unmute via pymumble callback, eliminating the 1s poll delay - Register USERUPDATED callback to preemptively duck when a user unmutes (they're about to speak) - Strip leading punctuation from trigger remainder (Whisper artifacts)
This commit is contained in:
@@ -11,6 +11,8 @@ import asyncio
|
|||||||
import io
|
import io
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
|
import math
|
||||||
|
import struct
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import urllib.request
|
import urllib.request
|
||||||
@@ -83,6 +85,50 @@ def _pcm_to_wav(pcm: bytes) -> bytes:
|
|||||||
return buf.getvalue()
|
return buf.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
# -- Acknowledge tone --------------------------------------------------------
|
||||||
|
|
||||||
|
_ACK_FREQ = (880, 1320) # A5 -> E6 ascending
|
||||||
|
_ACK_NOTE_DUR = 0.15 # seconds per note
|
||||||
|
_ACK_AMP = 12000 # gentle amplitude
|
||||||
|
_ACK_FRAME = 960 # 20ms at 48kHz, matches Mumble native
|
||||||
|
|
||||||
|
|
||||||
|
async def _ack_tone(bot) -> None:
|
||||||
|
"""Play a short two-tone ascending chime via pymumble sound_output."""
|
||||||
|
mu = getattr(bot, "_mumble", None)
|
||||||
|
if mu is None:
|
||||||
|
return
|
||||||
|
so = mu.sound_output
|
||||||
|
if so is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
# Unmute if self-muted (stream_audio handles re-mute later)
|
||||||
|
if getattr(bot, "_self_mute", False):
|
||||||
|
if bot._mute_task and not bot._mute_task.done():
|
||||||
|
bot._mute_task.cancel()
|
||||||
|
bot._mute_task = None
|
||||||
|
try:
|
||||||
|
mu.users.myself.unmute()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
frames_per_note = int(_ACK_NOTE_DUR / 0.02) # 0.02s per frame
|
||||||
|
for freq in _ACK_FREQ:
|
||||||
|
for i in range(frames_per_note):
|
||||||
|
samples = []
|
||||||
|
for j in range(_ACK_FRAME):
|
||||||
|
t = (i * _ACK_FRAME + j) / _SAMPLE_RATE
|
||||||
|
samples.append(int(_ACK_AMP * math.sin(2 * math.pi * freq * t)))
|
||||||
|
pcm = struct.pack(f"<{_ACK_FRAME}h", *samples)
|
||||||
|
so.add_sound(pcm)
|
||||||
|
while so.get_buffer_size() > 0.5:
|
||||||
|
await asyncio.sleep(0.02)
|
||||||
|
|
||||||
|
# Wait for tone to finish
|
||||||
|
while so.get_buffer_size() > 0:
|
||||||
|
await asyncio.sleep(0.05)
|
||||||
|
|
||||||
|
|
||||||
# -- STT: Sound listener (pymumble thread) ----------------------------------
|
# -- STT: Sound listener (pymumble thread) ----------------------------------
|
||||||
|
|
||||||
|
|
||||||
@@ -170,7 +216,7 @@ async def _flush_monitor(bot):
|
|||||||
|
|
||||||
trigger = ps["trigger"]
|
trigger = ps["trigger"]
|
||||||
if trigger and text.lower().startswith(trigger.lower()):
|
if trigger and text.lower().startswith(trigger.lower()):
|
||||||
remainder = text[len(trigger):].strip()
|
remainder = text[len(trigger):].strip().lstrip(",.;:!?")
|
||||||
if remainder:
|
if remainder:
|
||||||
log.info("voice: trigger from %s: %s", name, remainder)
|
log.info("voice: trigger from %s: %s", name, remainder)
|
||||||
bot._spawn(
|
bot._spawn(
|
||||||
@@ -243,8 +289,10 @@ async def _tts_play(bot, text: str):
|
|||||||
if wav_path is None:
|
if wav_path is None:
|
||||||
return
|
return
|
||||||
try:
|
try:
|
||||||
# Signal music plugin to duck while TTS is playing
|
# Signal music plugin to duck, wait for it to take effect
|
||||||
bot.registry._tts_active = True
|
bot.registry._tts_active = True
|
||||||
|
await asyncio.sleep(1.5)
|
||||||
|
await _ack_tone(bot)
|
||||||
done = asyncio.Event()
|
done = asyncio.Event()
|
||||||
await bot.stream_audio(str(wav_path), volume=1.0, on_done=done)
|
await bot.stream_audio(str(wav_path), volume=1.0, on_done=done)
|
||||||
await done.wait()
|
await done.wait()
|
||||||
|
|||||||
@@ -20,6 +20,7 @@ from pymumble_py3.constants import (
|
|||||||
PYMUMBLE_CLBK_DISCONNECTED,
|
PYMUMBLE_CLBK_DISCONNECTED,
|
||||||
PYMUMBLE_CLBK_SOUNDRECEIVED,
|
PYMUMBLE_CLBK_SOUNDRECEIVED,
|
||||||
PYMUMBLE_CLBK_TEXTMESSAGERECEIVED,
|
PYMUMBLE_CLBK_TEXTMESSAGERECEIVED,
|
||||||
|
PYMUMBLE_CLBK_USERUPDATED,
|
||||||
)
|
)
|
||||||
|
|
||||||
from derp.bot import _TokenBucket
|
from derp.bot import _TokenBucket
|
||||||
@@ -217,6 +218,10 @@ class MumbleBot:
|
|||||||
PYMUMBLE_CLBK_SOUNDRECEIVED,
|
PYMUMBLE_CLBK_SOUNDRECEIVED,
|
||||||
self._on_sound_received,
|
self._on_sound_received,
|
||||||
)
|
)
|
||||||
|
self._mumble.callbacks.set_callback(
|
||||||
|
PYMUMBLE_CLBK_USERUPDATED,
|
||||||
|
self._on_user_updated,
|
||||||
|
)
|
||||||
self._mumble.set_receive_sound(self._receive_sound)
|
self._mumble.set_receive_sound(self._receive_sound)
|
||||||
self._mumble.start()
|
self._mumble.start()
|
||||||
self._mumble.is_ready()
|
self._mumble.is_ready()
|
||||||
@@ -287,6 +292,18 @@ class MumbleBot:
|
|||||||
log.warning("mumble: disconnected")
|
log.warning("mumble: disconnected")
|
||||||
self._last_voice_ts = 0.0
|
self._last_voice_ts = 0.0
|
||||||
|
|
||||||
|
def _instant_duck(self) -> None:
|
||||||
|
"""Snap music volume to duck floor immediately.
|
||||||
|
|
||||||
|
Called from pymumble thread on voice/unmute events so ducking
|
||||||
|
takes effect on the next audio frame (~20ms) instead of waiting
|
||||||
|
for the 1s duck monitor poll.
|
||||||
|
"""
|
||||||
|
for peer in getattr(self.registry, "_bots", {}).values():
|
||||||
|
ps = getattr(peer, "_pstate", {}).get("music")
|
||||||
|
if ps and ps.get("duck_enabled") and ps.get("task"):
|
||||||
|
ps["duck_vol"] = float(ps["duck_floor"])
|
||||||
|
|
||||||
def _on_sound_received(self, user, sound_chunk) -> None:
|
def _on_sound_received(self, user, sound_chunk) -> None:
|
||||||
"""Callback from pymumble thread: voice audio received.
|
"""Callback from pymumble thread: voice audio received.
|
||||||
|
|
||||||
@@ -302,12 +319,33 @@ class MumbleBot:
|
|||||||
self.registry._voice_ts = self._last_voice_ts
|
self.registry._voice_ts = self._last_voice_ts
|
||||||
if prev == 0.0:
|
if prev == 0.0:
|
||||||
log.info("mumble: first voice packet from %s", name or "?")
|
log.info("mumble: first voice packet from %s", name or "?")
|
||||||
|
self._instant_duck()
|
||||||
for fn in self._sound_listeners:
|
for fn in self._sound_listeners:
|
||||||
try:
|
try:
|
||||||
fn(user, sound_chunk)
|
fn(user, sound_chunk)
|
||||||
except Exception:
|
except Exception:
|
||||||
log.exception("mumble: sound listener error")
|
log.exception("mumble: sound listener error")
|
||||||
|
|
||||||
|
def _on_user_updated(self, user, actions) -> None:
|
||||||
|
"""Callback from pymumble thread: user state changed.
|
||||||
|
|
||||||
|
When a non-bot user unmutes, update ``_voice_ts`` and snap duck
|
||||||
|
volume to floor immediately.
|
||||||
|
"""
|
||||||
|
if "self_mute" not in actions:
|
||||||
|
return
|
||||||
|
name = user["name"] if isinstance(user, dict) else None
|
||||||
|
bots = getattr(self.registry, "_bots", {})
|
||||||
|
if name and name in bots:
|
||||||
|
return
|
||||||
|
# Only care about unmute (self_mute going False)
|
||||||
|
if user.get("self_mute", True):
|
||||||
|
return
|
||||||
|
log.info("mumble: %s unmuted, preemptive duck", name or "?")
|
||||||
|
self._last_voice_ts = time.monotonic()
|
||||||
|
self.registry._voice_ts = self._last_voice_ts
|
||||||
|
self._instant_duck()
|
||||||
|
|
||||||
def _on_text_message(self, message) -> None:
|
def _on_text_message(self, message) -> None:
|
||||||
"""Callback from pymumble thread: text message received.
|
"""Callback from pymumble thread: text message received.
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user