diff --git a/ROADMAP.md b/ROADMAP.md index a07984d..971d144 100644 --- a/ROADMAP.md +++ b/ROADMAP.md @@ -178,3 +178,25 @@ - [x] Autoplay shuffled kept tracks on reconnect (silence detection) - [x] Alias plugin (!alias add/del/list) - [x] Container management tools (tools/build, start, stop, restart, nuke, logs, status) + +## v2.4.0 -- Music Discovery + Performance + +- [ ] Last.fm integration (artist.getSimilar, artist.getTopTags, track.getSimilar) +- [ ] `!similar` command (find similar artists, optionally queue via YouTube) +- [ ] `!tags` command (genre/style tags for current track) +- [x] Pause/unpause (`!pause` toggle, position tracking, stale re-download) +- [x] Autoplay continuous radio (random kept, silence-aware, cooldown between tracks) +- [x] Periodic resume persistence (10s interval, survives hard kills) +- [x] Track duration in `!np` (elapsed/total via ffprobe) +- [x] `!announce` toggle (optional track announcements) +- [x] Direct bot addressing (`merlin: say `, TTS via voice peer) +- [x] Self-deafen on connect +- [x] Fade-out click fix (conditional buffer clear, post-fade drain) +- [x] cProfile analysis tool (`tools/profile`) +- [x] Mute detection: skip duck silence when all users muted +- [x] Autoplay shuffle deck (no repeats until full cycle) +- [x] Seek clamp to track duration (prevent seek-past-end stall) +- [x] Iterative `_extract_videos` (replace 51K-deep recursion with stack) +- [x] Bypass SOCKS5 for local SearXNG (`proxy=False`) +- [x] Connection pool: `preload_content=True` for SOCKS connection reuse +- [x] Pool tuning: 30 pools / 8 connections (up from 20/4) diff --git a/TASKS.md b/TASKS.md index 26b4f6b..57b61fe 100644 --- a/TASKS.md +++ b/TASKS.md @@ -1,6 +1,28 @@ # derp - Tasks -## Current Sprint -- v2.3.0 Mumble Voice + Multi-Bot (2026-02-22) +## Current Sprint -- Performance: HTTP + Parsing (2026-02-22) + +| Pri | Status | Task | +|-----|--------|------| +| P0 | [x] | Rewrite `_extract_videos` as iterative stack-based (51K recursive calls from 4 invocations) | +| P0 | [x] | `plugins/searx.py` -- route through `derp.http.urlopen(proxy=False)` | +| P1 | [x] | Connection pool: `preload_content=True` + `_PooledResponse` wrapper for connection reuse | +| P1 | [x] | Pool tuning: `num_pools=30, maxsize=8` (was 20/4) | +| P2 | [ ] | Audit remaining plugins for unnecessary proxy routing | + +## Previous Sprint -- Music Discovery via Last.fm (2026-02-22) + +| Pri | Status | Task | +|-----|--------|------| +| P0 | [x] | `plugins/lastfm.py` -- Last.fm API client (artist.getSimilar, artist.getTopTags, track.getSimilar) | +| P0 | [x] | `!similar` command -- show similar artists for current or named track/artist | +| P0 | [x] | `!similar play` -- queue a similar track via YouTube search | +| P1 | [x] | `!tags` command -- show genre/style tags for current or named track | +| P1 | [x] | Config: `[lastfm] api_key` or `LASTFM_API_KEY` env var | +| P2 | [ ] | Tests: `test_lastfm.py` (API response mocking, command dispatch) | +| P2 | [ ] | Documentation update (USAGE.md, CHEATSHEET.md) | + +## Previous Sprint -- v2.3.0 Mumble Voice + Multi-Bot (2026-02-22) | Pri | Status | Task | |-----|--------|------| diff --git a/TODO.md b/TODO.md index 48d75be..a059962 100644 --- a/TODO.md +++ b/TODO.md @@ -130,6 +130,17 @@ is preserved in git history for reference. - [ ] SASL authentication - [ ] TLS/STARTTLS connection +## Performance + +- [ ] Iterative `_extract_videos` in alert.py (51K recursive calls, 6.7s CPU) +- [ ] Bypass SOCKS5 for local services (FlaskPaste, SearXNG) +- [ ] Connection pool tuning (529 SOCKS connections per 25min session) +- [ ] Async HTTP client (aiohttp + aiohttp-socks) to avoid blocking executors +- [x] Connection pooling via urllib3 SOCKSProxyManager +- [x] Batch OG fetch via ThreadPoolExecutor +- [x] HTTP opener caching at module level +- [x] Per-backend error tracking with exponential backoff + ## Mumble - [x] Mumble adapter via TCP/TLS + protobuf control channel (no SDK) @@ -144,9 +155,23 @@ is preserved in git history for reference. - [x] Configurable voice profiles (voice, FX chain) - [x] Self-mute support (auto mute/unmute around audio) - [x] Bot audio isolation (ignore own bots in sound callback) +- [x] Pause/unpause with position tracking, stale stream re-download, rewind + fade-in +- [x] Autoplay continuous radio (random kept track, silence-aware, configurable cooldown) +- [x] Periodic resume state persistence (survives hard kills) +- [x] Track duration in `!np` (ffprobe), optional `!announce` toggle +- [x] Direct bot addressing (`merlin: say `) +- [x] Self-deafen on connect - [ ] Per-channel voice settings (different voice per channel) - [ ] Voice activity log (who spoke, duration, transcript) +## Music Discovery + +- [ ] Last.fm integration (API key, free tier) +- [ ] `!similar` command -- find similar artists/tracks via Last.fm +- [ ] `!tags` command -- show genre/style tags for current track +- [ ] Auto-queue similar tracks when autoplay has no kept tracks +- [ ] MusicBrainz fallback (no API key, 1 req/sec rate limit) + ## Slack - [ ] Slack adapter via Socket Mode WebSocket (no SDK) diff --git a/docs/AUDIO.md b/docs/AUDIO.md new file mode 100644 index 0000000..b0faeb0 --- /dev/null +++ b/docs/AUDIO.md @@ -0,0 +1,333 @@ +# Audio Engine -- Issues, Fixes, and Consolidation Notes + +Technical reference for the Mumble audio pipeline: known issues, +applied fixes, architectural decisions, and areas for future work. + +## Architecture Overview + +``` +yt-dlp -> ffmpeg (decode to s16le 48kHz mono) -> PCM frames (20ms) + -> volume ramp/scale -> pymumble sound_output -> Opus encode -> Mumble +``` + +Key components: + +| File | Role | +|------|------| +| `src/derp/mumble.py` | `stream_audio()` -- PCM feed loop, volume ramp, seek | +| `plugins/music.py` | Queue, play loop, fade orchestration, duck monitor | + +### Volume control layers (evaluated per-frame, highest priority first) + +1. **fade_vol** -- active during fade-out (skip/stop/pause); set to 0 as target +2. **duck_vol** -- voice-activated ducking; snap to floor, linear restore +3. **volume** -- user-set level (0-100) + +The play loop passes a lambda to `stream_audio`: + +```python +volume=lambda: ( + ps["fade_vol"] if ps["fade_vol"] is not None else + ps["duck_vol"] if ps["duck_vol"] is not None else + ps["volume"] +) / 100.0 +``` + +### Per-frame volume ramping + +`stream_audio` never jumps to the target volume. Each 20ms frame is +ramped from `_cur_vol` toward `target` by at most `step`: + +- **_max_step** = 0.005 (~4s full ramp) -- ceiling for normal changes +- **fade_in_step** -- computed from fade-in duration (default 5s) +- **fade_step** -- override from plugin (fade-out on skip/stop/pause) + +When `abs(diff) < 0.0001`, flat scaling is used (avoids ramp artifacts +on steady-state frames). Otherwise, `_scale_pcm_ramp()` linearly +interpolates across all 960 samples in the frame. + +--- + +## Issues and Fixes + +### 1. Alpine ffmpeg lacks librubberband + +**Symptom:** 13/15 voice audition samples failed. `rubberband` audio +filter unavailable in ffmpeg. + +**Root cause:** Alpine's ffmpeg package is compiled without +`--enable-librubberband`. + +**Fix:** Added `rubberband` CLI package to `Containerfile`. Created +`_split_fx()` in `plugins/voice.py` to parse FX chains: pitch-shifting +goes through the `rubberband` CLI binary, remaining filters (bass, echo) +through ffmpeg. Two-stage pipeline. + +**Files:** `Containerfile`, `plugins/voice.py` + +--- + +### 2. Self-ducking between bots + +**Symptom:** derp's music volume dropped when merlin spoke (TTS). + +**Root cause:** merlin's TTS output triggered `_on_sound_received`, +which updated the shared `registry._voice_ts` timestamp. derp's duck +monitor saw recent voice activity and ducked. + +**Fix:** `_on_sound_received` checks `registry._bots` and returns early +for any bot username -- no timestamp update, no listener dispatch. + +```python +def _on_sound_received(self, user, sound_chunk) -> None: + name = user["name"] if isinstance(user, dict) else None + bots = getattr(self.registry, "_bots", {}) + if name and name in bots: + return # ignore audio from bots entirely +``` + +**Files:** `src/derp/mumble.py` + +--- + +### 3. Click/pop on skip/stop (fade-out cancellation) + +**Symptom:** Audible glitch at the end of fade-out when skipping or +stopping a track. + +**Root cause:** `_fade_and_cancel()` fades volume to 0 over ~3s, then +calls `task.cancel()`. In `stream_audio`, `CancelledError` triggers +`clear_buffer()`, which drops any frames still queued in pymumble's +output -- including frames that were encoded at non-zero amplitude a +few frames earlier. The sudden buffer wipe produces a click. + +**Fix (two-part):** + +1. **Plugin side** (`music.py`): Added 150ms post-fade drain before + cancel, giving pymumble time to flush remaining silent frames. + +2. **Engine side** (`mumble.py`): `CancelledError` handler only calls + `clear_buffer()` if `_cur_vol > 0.01`. When a fade-out has already + driven volume to ~0, the remaining buffer frames are silent and + clearing them is unnecessary. + +```python +# mumble.py -- CancelledError handler +if _cur_vol > 0.01: + self._mumble.sound_output.clear_buffer() +``` + +```python +# music.py -- _fade_and_cancel() +await asyncio.sleep(duration) +await asyncio.sleep(0.15) # drain window +task.cancel() +``` + +**Files:** `src/derp/mumble.py`, `plugins/music.py` + +--- + +### 4. Fade-out math + +**How it works:** `_fade_and_cancel(duration=3.0)` computes the +per-frame step from the current effective volume: + +```python +cur_vol = (duck_vol or volume) / 100.0 +n_frames = duration / 0.02 # 150 frames for 3s +step = cur_vol / n_frames +``` + +The play loop sets `ps["fade_vol"] = 0` (the target) and +`ps["fade_step"] = step` (the rate). `stream_audio` ramps `_cur_vol` +toward 0 at `step` per frame. At 50% volume: step = 0.0033, reaching +zero in exactly 150 frames (3.0s). + +**Note:** `fade_vol` is set to 0 immediately, making the volume lambda +return 0 as the target. The ramp code smoothly transitions -- there is +no abrupt jump because `_cur_vol` tracks actual output level, not the +target. + +--- + +### 5. Self-mute lifecycle + +**Requirement:** merlin mutes on connect, unmutes only when emitting +audio (TTS), re-mutes after a delay. + +**Implementation:** + +``` +connect -> mute() +stream_audio start -> cancel pending mute task, unmute() +stream_audio finally -> spawn _delayed_mute(3.0) +``` + +The 3-second delay prevents rapid mute/unmute flicker on back-to-back +TTS. The mute task is cancelled if new audio starts before it fires. + +**Config:** `self_mute = true` in `[[mumble.extra]]` + +**Files:** `src/derp/mumble.py` + +--- + +### 6. Self-deafen on connect + +**Requirement:** merlin deafens on connect (no audio reception needed). + +**Implementation:** `self_deaf = true` config flag, calls +`self._mumble.users.myself.deafen()` in `_on_connected`. + +**Files:** `src/derp/mumble.py`, `config/derp.toml` + +--- + +## Pause/Resume + +### Design + +`!pause` toggles between paused and playing states: + +**Pause:** Captures current track + elapsed position + monotonic +timestamp. Fades out, cancels play loop. Queue is preserved. + +**Unpause:** Re-inserts track at queue front, starts play loop with +seek. Two special behaviors: + +1. **Rewind:** 3s rewind on unpause for continuity (only if paused >= 3s + to prevent anti-flood: rapid toggle doesn't compound the rewind). + +2. **Stale stream:** If paused > 45s, cached stream files (in + `data/music/cache/`) are deleted so the play loop re-downloads. + Kept files (`data/music/`) are never deleted. Stream URLs from + YouTube et al. expire within minutes. + +3. **Fade-in:** Unpause always uses `fade_in=True` (5s ramp from 0). + +**State cleanup:** `!stop` clears `ps["paused"]`. The play loop's +`finally` block skips `_cleanup_track` when paused (preserves the file). + +--- + +## Autoplay + +### Design + +When `autoplay = true` (config), the play loop stays alive after the +queue empties: + +1. Waits for silence (duck_silence threshold, default 15s) +2. Picks one random kept track +3. Plays it +4. On completion, loops back to step 1 + +This replaces the previous bulk-queue approach (shuffle all kept tracks +at once). Benefits: no large upfront queue, silence-aware gaps between +tracks, indefinite looping. + +### Resume persistence + +A background task saves track URL + elapsed position to the state DB +every 10 seconds during playback: + +```python +async def _periodic_save(): + while True: + await asyncio.sleep(10) + el = cur_seek + progress[0] * 0.02 + if el > 1.0: + _save_resume(bot, track, el) +``` + +On hard kill: resumes from at most ~10s behind. On normal track +completion: `_clear_resume()` wipes the state. + +--- + +## Voice Ducking + +### Flow + +``` +voice detected -> duck_vol = floor (instant) +silence > duck_silence -> linear restore over duck_restore seconds +``` + +The duck monitor runs as a background task alongside the play loop. +It updates `ps["duck_vol"]` which the volume lambda reads per-frame. + +### Restore ramp + +Restoration is linear from floor to user volume. The per-frame ramp in +`stream_audio` further smooths each 1-second update from the monitor, +eliminating audible steps. + +### Bot audio isolation + +Bot usernames (from `registry._bots`) are excluded from +`_on_sound_received` entirely -- no timestamp update, no listener +dispatch. This prevents self-ducking between derp and merlin. + +--- + +## Seek (in-stream pipeline swap) + +### Design + +Seek rebuilds the ffmpeg pipeline at the new position without cancelling +the play loop task. This avoids the overhead of re-downloading. + +1. Set `_seek_fading = True`, `_seek_fade_out = 10` (0.2s ramp-down) +2. Continue reading frames, scaling by decreasing ratio +3. At fade-out = 0: kill ffmpeg, clear buffer, spawn new pipeline +4. 0.5s fade-in on the new pipeline + +### Consolidation note + +Seek fade-out (10 frames / 0.2s) is much shorter than skip/stop +fade-out (3s). This is intentional -- seek should feel responsive. +The mechanisms are separate: seek uses frame-counting in +`stream_audio`, skip/stop uses `_fade_and_cancel` in the plugin. + +--- + +## Consolidation Opportunities + +### Volume control unification + +Three volume layers (fade_vol, duck_vol, volume) evaluated in a lambda +per-frame. Works but the priority logic is implicit. A future refactor +could use a single `effective_volume()` method that explicitly resolves +priority and makes the per-frame cost clearer. + +### Fade-out ownership + +Skip/stop/pause all route through `_fade_and_cancel()` -- good. But the +fade target is communicated indirectly via `ps["fade_vol"] = 0` and +`ps["fade_step"]`, read by a lambda in the play loop, evaluated in +`stream_audio`. A more explicit signal (e.g. an asyncio.Event or a +dedicated fade state machine in `stream_audio`) could simplify reasoning +about timing. + +### Buffer drain timing + +The 150ms post-fade drain is empirical. A more robust approach would be +to query `sound_output.get_buffer_size()` and wait for it to drop below +a threshold before cancelling. This would adapt to varying network +conditions and pymumble buffer sizes. + +### Track duration + +Duration is probed via `ffprobe` after download (blocking, run in +executor). For kept tracks, it's stored in state metadata. This is +duplicated -- kept track metadata already has duration from +`_fetch_metadata` (yt-dlp). The `ffprobe` path is the fallback for +non-kept tracks. Could unify by always probing locally. + +### Periodic resume save interval + +Currently 10s fixed. Could be adaptive -- save more frequently near +the start of a track (where losing position is more noticeable) and +less frequently later. Marginal benefit vs. complexity though. diff --git a/docs/CHEATSHEET.md b/docs/CHEATSHEET.md index d08f399..1201e0a 100644 --- a/docs/CHEATSHEET.md +++ b/docs/CHEATSHEET.md @@ -69,6 +69,19 @@ Code, plugins, config, and data are bind-mounted. No rebuild needed for code changes -- restart the container or use `!reload` for plugins. Rebuild only when `requirements.txt` or `Containerfile` change. +## Profiling + +```bash +tools/profile # Top 30 by cumulative time +tools/profile -s tottime -n 20 # Top 20 by total time +tools/profile -f mumble # Filter to mumble functions +tools/profile -c -f stream_audio # Who calls stream_audio +tools/profile data/old.prof # Analyze a specific file +``` + +Sort keys: `cumtime`, `tottime`, `calls`, `name`. +Profile data written on graceful shutdown when bot runs with `--cprofile`. + ## Bot Commands ``` @@ -569,6 +582,7 @@ HTML stripped on receive, escaped on send. IRC-only commands are no-ops. !keep # Keep current file + save metadata !kept # List kept files with metadata !kept clear # Delete all kept files + metadata +!kept repair # Re-download missing kept files !duck # Show ducking status !duck on # Enable voice ducking !duck off # Disable voice ducking diff --git a/docs/USAGE.md b/docs/USAGE.md index 62f291b..252c4ad 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -1628,7 +1628,7 @@ and voice transmission. !np Now playing !volume [0-100] Get/set volume (persisted across restarts) !keep Keep current track's audio file (with metadata) -!kept [clear] List kept files with metadata, or clear all +!kept [clear|repair] List kept files, clear all, or re-download missing !testtone Play 3-second 440Hz test tone ``` @@ -1751,6 +1751,8 @@ file (natural dedup). - Use `!kept` to list preserved files with metadata (title, artist, duration, file size) - Use `!kept clear` to delete all preserved files and their metadata +- Use `!kept repair` to re-download any kept tracks whose local files are + missing (e.g. after a cleanup or volume mount issue) - On cancel/error, files are not deleted (needed for `!resume`) ### Extra Mumble Bots diff --git a/plugins/alert.py b/plugins/alert.py index e20a6ff..005ac62 100644 --- a/plugins/alert.py +++ b/plugins/alert.py @@ -368,45 +368,56 @@ def _fetch_og_batch(urls: list[str]) -> dict[str, tuple[str, str, str]]: # -- YouTube InnerTube search (blocking) ------------------------------------ def _extract_videos(obj: object, depth: int = 0) -> list[dict]: - """Recursively walk YouTube JSON to find video results. + """Walk YouTube JSON to find video results (iterative). Finds all objects containing both 'videoId' and 'title' keys. Resilient to YouTube rearranging wrapper layers. + Uses an explicit stack instead of recursion to avoid 50K+ call + overhead on deeply nested InnerTube responses. """ - if depth > 20: - return [] - results = [] - if isinstance(obj, dict): - video_id = obj.get("videoId") - title_obj = obj.get("title") - if isinstance(video_id, str) and video_id and title_obj is not None: - if isinstance(title_obj, dict): - runs = title_obj.get("runs", []) - title = "".join(r.get("text", "") for r in runs if isinstance(r, dict)) - elif isinstance(title_obj, str): - title = title_obj - else: - title = "" - if title: - # Extract relative publish time (e.g. "2 days ago") - pub_obj = obj.get("publishedTimeText") - date = "" - if isinstance(pub_obj, dict): - date = pub_obj.get("simpleText", "") - elif isinstance(pub_obj, str): - date = pub_obj - results.append({ - "id": video_id, - "title": title, - "url": f"https://www.youtube.com/watch?v={video_id}", - "date": date, - "extra": "", - }) - for val in obj.values(): - results.extend(_extract_videos(val, depth + 1)) - elif isinstance(obj, list): - for item in obj: - results.extend(_extract_videos(item, depth + 1)) + _MAX_DEPTH = 20 + results: list[dict] = [] + # Stack of (node, depth) tuples + stack: list[tuple[object, int]] = [(obj, 0)] + while stack: + node, d = stack.pop() + if d > _MAX_DEPTH: + continue + if isinstance(node, dict): + video_id = node.get("videoId") + title_obj = node.get("title") + if isinstance(video_id, str) and video_id and title_obj is not None: + if isinstance(title_obj, dict): + runs = title_obj.get("runs", []) + title = "".join( + r.get("text", "") for r in runs if isinstance(r, dict) + ) + elif isinstance(title_obj, str): + title = title_obj + else: + title = "" + if title: + pub_obj = node.get("publishedTimeText") + date = "" + if isinstance(pub_obj, dict): + date = pub_obj.get("simpleText", "") + elif isinstance(pub_obj, str): + date = pub_obj + results.append({ + "id": video_id, + "title": title, + "url": f"https://www.youtube.com/watch?v={video_id}", + "date": date, + "extra": "", + }) + # Reverse to preserve original traversal order (stack is LIFO) + children = [v for v in node.values() if isinstance(v, (dict, list))] + for val in reversed(children): + stack.append((val, d + 1)) + elif isinstance(node, list): + for item in reversed(node): + if isinstance(item, (dict, list)): + stack.append((item, d + 1)) return results diff --git a/plugins/lastfm.py b/plugins/lastfm.py new file mode 100644 index 0000000..28b37d4 --- /dev/null +++ b/plugins/lastfm.py @@ -0,0 +1,272 @@ +"""Plugin: music discovery via Last.fm API.""" + +from __future__ import annotations + +import json +import logging +import os +import random +from urllib.parse import urlencode + +from derp.plugin import command + +log = logging.getLogger(__name__) + +_BASE = "https://ws.audioscrobbler.com/2.0/" + + +# -- Config ------------------------------------------------------------------ + + +def _get_api_key(bot) -> str: + """Resolve Last.fm API key from env or config.""" + return (os.environ.get("LASTFM_API_KEY", "") + or bot.config.get("lastfm", {}).get("api_key", "")) + + +# -- API helpers ------------------------------------------------------------- + + +def _api_call(api_key: str, method: str, **params) -> dict: + """Blocking Last.fm API call. Run in executor.""" + from derp.http import urlopen + + qs = urlencode({ + "method": method, + "api_key": api_key, + "format": "json", + **params, + }) + url = f"{_BASE}?{qs}" + try: + resp = urlopen(url, timeout=10) + return json.loads(resp.read().decode()) + except Exception: + log.exception("lastfm: API call failed: %s", method) + return {} + + +def _get_similar_artists(api_key: str, artist: str, + limit: int = 10) -> list[dict]: + """Fetch similar artists for a given artist name.""" + data = _api_call(api_key, "artist.getSimilar", + artist=artist, limit=str(limit)) + artists = data.get("similarartists", {}).get("artist", []) + if isinstance(artists, dict): + artists = [artists] + return artists + + +def _get_top_tags(api_key: str, artist: str) -> list[dict]: + """Fetch top tags for an artist.""" + data = _api_call(api_key, "artist.getTopTags", artist=artist) + tags = data.get("toptags", {}).get("tag", []) + if isinstance(tags, dict): + tags = [tags] + return tags + + +def _get_similar_tracks(api_key: str, artist: str, track: str, + limit: int = 10) -> list[dict]: + """Fetch similar tracks for a given artist + track.""" + data = _api_call(api_key, "track.getSimilar", + artist=artist, track=track, limit=str(limit)) + tracks = data.get("similartracks", {}).get("track", []) + if isinstance(tracks, dict): + tracks = [tracks] + return tracks + + +def _search_track(api_key: str, query: str, + limit: int = 5) -> list[dict]: + """Search Last.fm for tracks matching a query.""" + data = _api_call(api_key, "track.search", + track=query, limit=str(limit)) + results = data.get("results", {}).get("trackmatches", {}).get("track", []) + if isinstance(results, dict): + results = [results] + return results + + +# -- Metadata extraction ----------------------------------------------------- + + +def _current_meta(bot) -> tuple[str, str]: + """Extract artist and title from the currently playing track. + + Returns (artist, title). Either or both may be empty. + Tries the music plugin's current track metadata, falling back to + splitting the title on common separators. + """ + music_ps = bot._pstate.get("music", {}) + current = music_ps.get("current") + if current is None: + return ("", "") + raw_title = current.title or "" + + # Try common "Artist - Title" patterns + for sep in (" - ", " -- ", " | ", " ~ "): + if sep in raw_title: + parts = raw_title.split(sep, 1) + return (parts[0].strip(), parts[1].strip()) + + # No separator -- treat whole thing as a search query + return ("", raw_title) + + +# -- Formatting -------------------------------------------------------------- + + +def _fmt_match(m: float | str) -> str: + """Format a Last.fm match score as a percentage.""" + try: + return f"{float(m) * 100:.0f}%" + except (ValueError, TypeError): + return "" + + +# -- Commands ---------------------------------------------------------------- + + +@command("similar", help="Music: !similar [artist|play] -- find similar music") +async def cmd_similar(bot, message): + """Find similar artists or tracks. + + Usage: + !similar Similar to currently playing track + !similar Similar artists to named artist + !similar play Queue a random similar track + !similar play Queue a similar track for named artist + """ + api_key = _get_api_key(bot) + if not api_key: + await bot.reply(message, "Last.fm API key not configured") + return + + parts = message.text.split(None, 2) + # !similar play [artist] + play_mode = len(parts) >= 2 and parts[1].lower() == "play" + if play_mode: + query = parts[2].strip() if len(parts) > 2 else "" + else: + query = parts[1].strip() if len(parts) > 1 else "" + + import asyncio + loop = asyncio.get_running_loop() + + # Resolve artist from query or current track + if query: + artist = query + title = "" + else: + artist, title = _current_meta(bot) + if not artist and not title: + await bot.reply(message, "Nothing playing and no artist given") + return + + # Try track-level similarity first if we have both artist + title + similar = [] + if artist and title: + similar = await loop.run_in_executor( + None, _get_similar_tracks, api_key, artist, title, + ) + + # Fall back to artist-level similarity + if not similar: + search_artist = artist or title + similar_artists = await loop.run_in_executor( + None, _get_similar_artists, api_key, search_artist, + ) + if not similar_artists: + await bot.reply(message, f"No similar artists found for '{search_artist}'") + return + + if play_mode: + # Pick a random similar artist and search YouTube + pick = random.choice(similar_artists[:10]) + pick_name = pick.get("name", "") + if not pick_name: + await bot.reply(message, "No playable result found") + return + # Inject a !play command with a YouTube search + message.text = f"!play {pick_name}" + music_mod = bot.registry._modules.get("music") + if music_mod: + await music_mod.cmd_play(bot, message) + return + + # Display similar artists + lines = [f"Similar to {search_artist}:"] + for a in similar_artists[:8]: + name = a.get("name", "?") + match = _fmt_match(a.get("match", "")) + suffix = f" ({match})" if match else "" + lines.append(f" {name}{suffix}") + await bot.long_reply(message, lines, label="similar artists") + return + + # Track-level results + if play_mode: + pick = random.choice(similar[:10]) + pick_artist = pick.get("artist", {}).get("name", "") + pick_title = pick.get("name", "") + search = f"{pick_artist} {pick_title}".strip() + if not search: + await bot.reply(message, "No playable result found") + return + message.text = f"!play {search}" + music_mod = bot.registry._modules.get("music") + if music_mod: + await music_mod.cmd_play(bot, message) + return + + # Display similar tracks + lines = [f"Similar to {artist} - {title}:"] + for t in similar[:8]: + t_artist = t.get("artist", {}).get("name", "") + t_name = t.get("name", "?") + match = _fmt_match(t.get("match", "")) + suffix = f" ({match})" if match else "" + lines.append(f" {t_artist} - {t_name}{suffix}") + await bot.long_reply(message, lines, label="similar tracks") + + +@command("tags", help="Music: !tags [artist] -- show genre tags") +async def cmd_tags(bot, message): + """Show genre/style tags for an artist. + + Usage: + !tags Tags for currently playing artist + !tags Tags for named artist + """ + api_key = _get_api_key(bot) + if not api_key: + await bot.reply(message, "Last.fm API key not configured") + return + + parts = message.text.split(None, 1) + query = parts[1].strip() if len(parts) > 1 else "" + + import asyncio + loop = asyncio.get_running_loop() + + if query: + artist = query + else: + artist, title = _current_meta(bot) + artist = artist or title + if not artist: + await bot.reply(message, "Nothing playing and no artist given") + return + + tags = await loop.run_in_executor( + None, _get_top_tags, api_key, artist, + ) + + if not tags: + await bot.reply(message, f"No tags found for '{artist}'") + return + + # Show top tags with counts + tag_names = [t.get("name", "?") for t in tags[:10] if t.get("name")] + await bot.reply(message, f"{artist}: {', '.join(tag_names)}") diff --git a/plugins/music.py b/plugins/music.py index 3a73889..aca3649 100644 --- a/plugins/music.py +++ b/plugins/music.py @@ -21,6 +21,7 @@ log = logging.getLogger(__name__) _MAX_QUEUE = 50 _MAX_TITLE_LEN = 80 +_PLAYLIST_BATCH = 10 # initial tracks resolved before playback starts @dataclass(slots=True) @@ -31,6 +32,7 @@ class _Track: origin: str = "" # original user-provided URL for re-resolution local_path: Path | None = None # set before playback keep: bool = False # True = don't delete after playback + duration: float = 0.0 # total duration in seconds (0 = unknown) # -- Per-bot runtime state --------------------------------------------------- @@ -55,6 +57,9 @@ def _ps(bot): "fade_step": None, "history": [], "autoplay": cfg.get("autoplay", True), + "autoplay_cooldown": cfg.get("autoplay_cooldown", 30), + "announce": cfg.get("announce", False), + "paused": None, "_watcher_task": None, }) @@ -171,27 +176,32 @@ def _clear_resume(bot) -> None: bot.state.delete("music", "resume") -def _resolve_tracks(url: str, max_tracks: int = _MAX_QUEUE) -> list[tuple[str, str]]: +def _resolve_tracks(url: str, max_tracks: int = _MAX_QUEUE, + start: int = 1) -> list[tuple[str, str]]: """Resolve URL into (url, title) pairs via yt-dlp. Blocking, run in executor. Handles both single videos and playlists. For playlists, returns up to - ``max_tracks`` individual entries. Falls back to ``[(url, url)]`` on error. + ``max_tracks`` individual entries starting from 1-based index ``start``. + Falls back to ``[(url, url)]`` on error. YouTube URLs with ``&list=`` are passed through intact so yt-dlp can resolve the full playlist. Playlist params are only stripped in ``_save_resume()`` where we need the exact video for resume. """ + end = start + max_tracks - 1 try: result = subprocess.run( [ "yt-dlp", "--flat-playlist", "--print", "url", "--print", "title", "--no-warnings", - f"--playlist-end={max_tracks}", url, + f"--playlist-start={start}", f"--playlist-end={end}", url, ], capture_output=True, text=True, timeout=30, ) lines = result.stdout.strip().splitlines() if len(lines) < 2: + if start > 1: + return [] # no more pages return [(url, url)] tracks = [] for i in range(0, len(lines) - 1, 2): @@ -201,9 +211,22 @@ def _resolve_tracks(url: str, max_tracks: int = _MAX_QUEUE) -> list[tuple[str, s if not track_url or track_url == "NA": track_url = url tracks.append((track_url, track_title or track_url)) - return tracks if tracks else [(url, url)] + return tracks if tracks else ([] if start > 1 else [(url, url)]) except Exception: - return [(url, url)] + return [] if start > 1 else [(url, url)] + + +def _probe_duration(path: str) -> float: + """Get duration in seconds via ffprobe. Blocking -- run in executor.""" + try: + result = subprocess.run( + ["ffprobe", "-v", "quiet", "-show_entries", "format=duration", + "-of", "default=noprint_wrappers=1:nokey=1", path], + capture_output=True, text=True, timeout=5, + ) + return float(result.stdout.strip()) + except Exception: + return 0.0 # -- Download helpers -------------------------------------------------------- @@ -299,6 +322,30 @@ def _cleanup_track(track: _Track) -> None: # -- Duck monitor ------------------------------------------------------------ +def _all_users_muted(bot) -> bool: + """True when every non-bot user in the channel is muted or deafened. + + Used to skip the duck silence threshold -- if everyone has muted, + there's no conversation to protect and music can restore immediately. + """ + if not hasattr(bot, "_mumble") or bot._mumble is None: + return False + bots = getattr(bot.registry, "_bots", {}) + try: + found_human = False + for session_id in list(bot._mumble.users): + user = bot._mumble.users[session_id] + name = user["name"] + if name in bots: + continue + found_human = True + if not (user["self_mute"] or user["mute"] or user["self_deaf"]): + return False + return found_human + except Exception: + return False + + async def _duck_monitor(bot) -> None: """Background task: duck volume when voice is detected, restore on silence. @@ -319,10 +366,15 @@ async def _duck_monitor(bot) -> None: restore_start = 0.0 continue ts = getattr(bot.registry, "_voice_ts", 0.0) - if ts == 0.0: + tts = getattr(bot.registry, "_tts_active", False) + if ts == 0.0 and not tts and ps["duck_vol"] is None: continue - silence = time.monotonic() - ts - if silence < ps["duck_silence"]: + silence = time.monotonic() - ts if ts else float("inf") + should_duck = silence < ps["duck_silence"] or tts + # Override: all users muted -- no conversation to protect + if should_duck and not tts and _all_users_muted(bot): + should_duck = False + if should_duck: # Voice active -- duck immediately if ps["duck_vol"] is None: log.info("duck: voice detected, ducking to %d%%", @@ -387,6 +439,8 @@ async def _auto_resume(bot) -> None: break if time.monotonic() - ts >= silence_needed: break + if _all_users_muted(bot): + break else: log.info("music: auto-resume aborted, channel not silent after 60s") await bot.send("0", f"Resume of '{title}' aborted -- " @@ -438,12 +492,13 @@ def _load_kept_tracks(bot) -> list[_Track]: requester="autoplay", local_path=fpath, keep=True, + duration=float(meta.get("duration", 0)), )) return tracks async def _autoplay_kept(bot) -> None: - """Shuffle kept tracks and start playback when idle after reconnect.""" + """Start autoplay loop -- the play loop handles silence-wait + random pick.""" ps = _ps(bot) if ps["current"] is not None: return @@ -455,31 +510,10 @@ async def _autoplay_kept(bot) -> None: # Let pymumble fully stabilize await asyncio.sleep(10) - # Wait for silence - deadline = time.monotonic() + 60 - silence_needed = ps.get("duck_silence", 15) - ts = getattr(bot.registry, "_voice_ts", 0.0) - if ts != 0.0 and time.monotonic() - ts < silence_needed: - await bot.send("0", - f"Shuffling {len(kept)} kept tracks once silent") - - while time.monotonic() < deadline: - await asyncio.sleep(2) - ts = getattr(bot.registry, "_voice_ts", 0.0) - if ts == 0.0: - break - if time.monotonic() - ts >= silence_needed: - break - else: - log.info("music: autoplay aborted, channel not silent after 60s") - return - if ps["current"] is not None: return - random.shuffle(kept) - ps["queue"].extend(kept) - log.info("music: autoplay %d kept tracks (shuffled)", len(kept)) + log.info("music: autoplay starting (%d kept tracks available)", len(kept)) _ensure_loop(bot) @@ -526,12 +560,43 @@ async def _play_loop(bot, *, seek: float = 0.0, fade_in: float | bool = True) -> first = True seek_req = [None] ps["seek_req"] = seek_req + _autoplay_pool: list[_Track] = [] # shuffled deck, refilled each cycle try: - while ps["queue"]: + while ps["queue"] or ps.get("autoplay"): + # Autoplay: cooldown + silence wait, then pick next from shuffled deck + if not ps["queue"]: + if not _autoplay_pool: + kept = _load_kept_tracks(bot) + if not kept: + break + random.shuffle(kept) + _autoplay_pool = kept + log.info("music: autoplay shuffled %d kept tracks", len(kept)) + cooldown = ps.get("autoplay_cooldown", 30) + log.info("music: autoplay cooldown %ds before next track", + cooldown) + await asyncio.sleep(cooldown) + # After cooldown, also wait for voice silence + silence_needed = ps.get("duck_silence", 15) + while True: + await asyncio.sleep(2) + ts = getattr(bot.registry, "_voice_ts", 0.0) + if ts == 0.0 or time.monotonic() - ts >= silence_needed: + break + if _all_users_muted(bot): + break + # Re-check: someone may have queued something or stopped + if ps["queue"]: + continue + pick = _autoplay_pool.pop(0) + ps["queue"].append(pick) + log.info("music: autoplay picked '%s' (%d remaining)", + pick.title, len(_autoplay_pool)) track = ps["queue"].pop(0) ps["current"] = track ps["fade_vol"] = None ps["fade_step"] = None + seek_req[0] = None # clear stale seek from previous track done = asyncio.Event() ps["done_event"] = done @@ -561,6 +626,30 @@ async def _play_loop(bot, *, seek: float = 0.0, fade_in: float | bool = True) -> else: source = str(track.local_path) + # Probe duration if unknown + if track.duration <= 0 and track.local_path: + loop = asyncio.get_running_loop() + track.duration = await loop.run_in_executor( + None, _probe_duration, str(track.local_path), + ) + + # Announce track + if ps.get("announce"): + dur = f" ({_fmt_time(track.duration)})" if track.duration > 0 else "" + await bot.send("0", f"Playing: {_truncate(track.title)}{dur}") + + # Periodic resume-state saver (survives hard kills) + async def _periodic_save(): + try: + while True: + await asyncio.sleep(10) + el = cur_seek + progress[0] * 0.02 + if el > 1.0: + _save_resume(bot, track, el) + except asyncio.CancelledError: + pass + + save_task = bot._spawn(_periodic_save(), name="music-save") try: await bot.stream_audio( source, @@ -589,6 +678,8 @@ async def _play_loop(bot, *, seek: float = 0.0, fade_in: float | bool = True) -> if elapsed > 1.0: _save_resume(bot, track, elapsed) break + finally: + save_task.cancel() await done.wait() if progress[0] > 0: @@ -604,8 +695,9 @@ async def _play_loop(bot, *, seek: float = 0.0, fade_in: float | bool = True) -> pass finally: # Clean up current track's cached file (skipped/stopped tracks) + # but not when pausing -- the track is preserved for unpause current = ps.get("current") - if current: + if current and ps.get("paused") is None: _cleanup_track(current) if duck_task and not duck_task.done(): duck_task.cancel() @@ -654,6 +746,9 @@ async def _fade_and_cancel(bot, duration: float = 3.0) -> None: log.debug("music: fading out (vol=%.2f, step=%.5f, duration=%.1fs)", cur_vol, step, duration) await asyncio.sleep(duration) + # Hold at zero briefly so the ramp fully settles and pymumble + # drains its output buffer -- prevents audible click on cancel. + await asyncio.sleep(0.15) ps["fade_step"] = None if not task.done(): task.cancel() @@ -663,6 +758,36 @@ async def _fade_and_cancel(bot, duration: float = 3.0) -> None: pass +# -- Lazy playlist resolution ------------------------------------------------ + + +async def _playlist_feeder(bot, url: str, start: int, cap: int, + shuffle: bool, requester: str, + origin: str) -> None: + """Background: resolve remaining playlist tracks and append to queue.""" + ps = _ps(bot) + loop = asyncio.get_running_loop() + try: + remaining = await loop.run_in_executor( + None, _resolve_tracks, url, cap, start, + ) + if not remaining: + return + if shuffle: + random.shuffle(remaining) + added = 0 + for track_url, title in remaining: + if len(ps["queue"]) >= _MAX_QUEUE: + break + ps["queue"].append(_Track(url=track_url, title=title, + requester=requester, origin=origin)) + added += 1 + tag = " (shuffled)" if shuffle else "" + log.info("music: background-resolved %d more tracks%s", added, tag) + except Exception: + log.warning("music: background playlist resolution failed") + + # -- Commands ---------------------------------------------------------------- @@ -719,6 +844,12 @@ async def cmd_play(bot, message): _ensure_loop(bot) return + # Strip #random fragment before URL classification / resolution + shuffle = False + if _is_url(url) and url.endswith("#random"): + shuffle = True + url = url[:-7] # strip "#random" + is_search = not _is_url(url) if is_search: url = f"ytsearch10:{url}" @@ -728,26 +859,43 @@ async def cmd_play(bot, message): return remaining = _MAX_QUEUE - len(ps["queue"]) + is_playlist = not is_search and ("list=" in url or "playlist" in url) + batch = min(_PLAYLIST_BATCH, remaining) if is_playlist else remaining + + if shuffle: + await bot.reply(message, "Resolving playlist...") + loop = asyncio.get_running_loop() - resolved = await loop.run_in_executor(None, _resolve_tracks, url, remaining) + resolved = await loop.run_in_executor(None, _resolve_tracks, url, batch) # Search: pick one random result instead of enqueuing all if is_search and len(resolved) > 1: resolved = [random.choice(resolved)] + if shuffle and len(resolved) > 1: + random.shuffle(resolved) + was_idle = ps["current"] is None requester = message.nick or "?" - added = 0 # Only set origin for direct URLs (not searches) so resume uses the # resolved video URL rather than an ephemeral search query origin = url if not is_search else "" + added = 0 for track_url, track_title in resolved[:remaining]: ps["queue"].append(_Track(url=track_url, title=track_title, requester=requester, origin=origin)) added += 1 - total_resolved = len(resolved) + # Background-resolve remaining playlist tracks + has_more = is_playlist and len(resolved) >= batch and added < remaining + if has_more and hasattr(bot, "_spawn"): + bot._spawn( + _playlist_feeder(bot, url, batch + 1, remaining - added, + shuffle, requester, origin), + name="music-playlist-feeder", + ) + shuffled = " (shuffled)" if shuffle and added > 1 else "" if added == 1: title = _truncate(resolved[0][1]) if was_idle: @@ -755,13 +903,18 @@ async def cmd_play(bot, message): else: pos = len(ps["queue"]) await bot.reply(message, f"Queued #{pos}: {title}") - elif added < total_resolved: + elif has_more: await bot.reply( message, - f"Queued {added} of {total_resolved} tracks (queue full)", + f"Queued {added} tracks{shuffled}, resolving more...", + ) + elif added < len(resolved): + await bot.reply( + message, + f"Queued {added} of {len(resolved)} tracks{shuffled} (queue full)", ) else: - await bot.reply(message, f"Queued {added} tracks") + await bot.reply(message, f"Queued {added} tracks{shuffled}") if was_idle: _ensure_loop(bot) @@ -775,6 +928,7 @@ async def cmd_stop(bot, message): ps = _ps(bot) ps["queue"].clear() + ps["paused"] = None task = ps.get("task") if task and not task.done(): @@ -793,6 +947,75 @@ async def cmd_stop(bot, message): await bot.reply(message, "Stopped") +_PAUSE_STALE = 45 # seconds before cached stream URLs are considered expired +_PAUSE_REWIND = 3 # seconds to rewind on unpause for continuity + + +@command("pause", help="Music: !pause -- toggle pause/unpause") +async def cmd_pause(bot, message): + """Pause or unpause playback. + + Pausing saves the current position and stops streaming. Unpausing + resumes from where it left off. If paused longer than 45 seconds, + non-local tracks are re-downloaded (stream URLs expire). + """ + if not _is_mumble(bot): + return + + ps = _ps(bot) + + # -- Unpause --------------------------------------------------------- + if ps["paused"] is not None: + data = ps["paused"] + ps["paused"] = None + track = data["track"] + elapsed = data["elapsed"] + pause_dur = time.monotonic() - data["paused_at"] + + # Stale stream: discard cached file so play loop re-downloads + if pause_dur > _PAUSE_STALE and track.local_path is not None: + cache = _CACHE_DIR / track.local_path.name + if track.local_path == cache or ( + track.local_path.parent == _CACHE_DIR + ): + track.local_path.unlink(missing_ok=True) + track.local_path = None + log.info("music: pause stale (%.0fs), will re-download", pause_dur) + + # Rewind only if paused long enough to warrant it (anti-flood) + rewind = _PAUSE_REWIND if pause_dur >= _PAUSE_REWIND else 0.0 + seek_pos = max(0.0, elapsed - rewind) + ps["queue"].insert(0, track) + await bot.reply( + message, + f"Unpaused: {_truncate(track.title)} at {_fmt_time(seek_pos)}", + ) + _ensure_loop(bot, seek=seek_pos, fade_in=True) + return + + # -- Pause ----------------------------------------------------------- + if ps["current"] is None: + await bot.reply(message, "Nothing playing") + return + + track = ps["current"] + progress = ps.get("progress") + cur_seek = ps.get("cur_seek", 0.0) + elapsed = cur_seek + (progress[0] * 0.02 if progress else 0.0) + + ps["paused"] = { + "track": track, + "elapsed": elapsed, + "paused_at": time.monotonic(), + } + + await _fade_and_cancel(bot) + await bot.reply( + message, + f"Paused: {_truncate(track.title)} at {_fmt_time(elapsed)}", + ) + + @command("resume", help="Music: !resume -- resume last stopped track") async def cmd_resume(bot, message): """Resume playback from the last interrupted position. @@ -925,6 +1148,11 @@ async def cmd_seek(bot, message): target = max(0.0, target) + # Clamp to track duration (leave 1s margin so ffmpeg produces output) + track = ps.get("current") + if track and track.duration > 0 and target >= track.duration: + target = max(0.0, track.duration - 1.0) + seek_req = ps.get("seek_req") if not seek_req: await bot.reply(message, "Nothing playing") @@ -988,10 +1216,13 @@ async def cmd_np(bot, message): progress = ps.get("progress") cur_seek = ps.get("cur_seek", 0.0) elapsed = cur_seek + (progress[0] * 0.02 if progress else 0.0) + pos = _fmt_time(elapsed) + if track.duration > 0: + pos = f"{pos}/{_fmt_time(track.duration)}" await bot.reply( message, f"Now playing: {_truncate(track.title)} [{track.requester}]" - f" ({_fmt_time(elapsed)})", + f" ({pos})", ) @@ -1134,6 +1365,29 @@ async def cmd_duck(bot, message): ) +@command("announce", help="Music: !announce [on|off] -- toggle track announcements") +async def cmd_announce(bot, message): + """Toggle automatic track announcements in the channel.""" + if not _is_mumble(bot): + return + + ps = _ps(bot) + parts = message.text.split() + if len(parts) >= 2: + sub = parts[1].lower() + if sub == "on": + ps["announce"] = True + elif sub == "off": + ps["announce"] = False + else: + await bot.reply(message, "Usage: !announce [on|off]") + return + else: + ps["announce"] = not ps["announce"] + state = "on" if ps["announce"] else "off" + await bot.reply(message, f"Track announcements: {state}") + + @command("keep", help="Music: !keep -- keep current track's audio file") async def cmd_keep(bot, message): """Mark the current track's local file to keep after playback. @@ -1209,19 +1463,23 @@ async def cmd_keep(bot, message): await bot.reply(message, f"Keeping #{keep_id}: {label}") -@command("kept", help="Music: !kept [clear] -- list or clear kept files") +@command("kept", help="Music: !kept [clear|repair] -- list, clear, or repair kept files") async def cmd_kept(bot, message): - """List or clear kept audio files in data/music/. + """List, clear, or repair kept audio files in data/music/. - When metadata is available (from ``!keep``), displays title, artist, - duration, and file size. Falls back to filename + size otherwise. + Usage: + !kept List kept tracks with metadata and file status + !kept clear Delete all kept files and metadata + !kept repair Re-download kept tracks whose local files are missing """ if not _is_mumble(bot): await bot.reply(message, "Mumble-only feature") return parts = message.text.split() - if len(parts) >= 2 and parts[1].lower() == "clear": + sub = parts[1].lower() if len(parts) >= 2 else "" + + if sub == "clear": count = 0 if _MUSIC_DIR.is_dir(): for f in _MUSIC_DIR.iterdir(): @@ -1235,6 +1493,10 @@ async def cmd_kept(bot, message): await bot.reply(message, f"Deleted {count} file(s)") return + if sub == "repair": + await _kept_repair(bot, message) + return + # Collect kept entries from state entries = [] for key in bot.state.keys("music"): @@ -1266,15 +1528,86 @@ async def cmd_kept(bot, message): label += f" -- {artist}" if dur > 0: label += f" ({_fmt_time(dur)})" - # Show file size if file exists + # Show file size if file exists, or mark missing fpath = _MUSIC_DIR / filename if filename else None size = "" if fpath and fpath.is_file(): size = f" [{fpath.stat().st_size / (1024 * 1024):.1f}MB]" + else: + size = " [MISSING]" lines.append(f" #{kid} {label}{size}") await bot.long_reply(message, lines, label="kept tracks") +async def _kept_repair(bot, message) -> None: + """Re-download kept tracks whose local files are missing.""" + entries = [] + for key in bot.state.keys("music"): + if not key.startswith("keep:"): + continue + raw = bot.state.get("music", key) + if not raw: + continue + try: + meta = json.loads(raw) + except (json.JSONDecodeError, TypeError): + continue + filename = meta.get("filename", "") + if not filename: + continue + fpath = _MUSIC_DIR / filename + if not fpath.is_file(): + entries.append((key, meta)) + + if not entries: + await bot.reply(message, "All kept files present, nothing to repair") + return + + await bot.reply(message, f"Repairing {len(entries)} missing file(s)...") + _MUSIC_DIR.mkdir(parents=True, exist_ok=True) + loop = asyncio.get_running_loop() + repaired = 0 + failed = 0 + + for key, meta in entries: + kid = meta.get("id", "?") + url = meta.get("url", "") + title = meta.get("title", "") + filename = meta["filename"] + if not url: + log.warning("music: repair #%s has no URL, skipping", kid) + failed += 1 + continue + + tid = hashlib.md5(url.encode()).hexdigest()[:12] + dl_path = await loop.run_in_executor( + None, _download_track, url, tid, title, + ) + if not dl_path: + log.warning("music: repair #%s download failed", kid) + failed += 1 + continue + + # Move to kept directory with expected filename + dest = _MUSIC_DIR / filename + if dl_path != dest: + # Extension may differ; update metadata if needed + if dl_path.suffix != dest.suffix: + new_filename = dest.stem + dl_path.suffix + dest = _MUSIC_DIR / new_filename + meta["filename"] = new_filename + bot.state.set("music", key, json.dumps(meta)) + shutil.move(str(dl_path), str(dest)) + + repaired += 1 + log.info("music: repaired #%s -> %s", kid, dest.name) + + msg = f"Repair complete: {repaired} restored" + if failed: + msg += f", {failed} failed" + await bot.reply(message, msg) + + # -- Plugin lifecycle -------------------------------------------------------- diff --git a/plugins/searx.py b/plugins/searx.py index 600fe03..31d1cbe 100644 --- a/plugins/searx.py +++ b/plugins/searx.py @@ -6,6 +6,7 @@ import json import urllib.parse import urllib.request +from derp.http import urlopen as _urlopen from derp.plugin import command # -- Constants --------------------------------------------------------------- @@ -38,7 +39,7 @@ def _search(query: str) -> list[dict]: url = f"{_SEARX_URL}?{params}" req = urllib.request.Request(url, method="GET") - resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT) + resp = _urlopen(req, timeout=_FETCH_TIMEOUT, proxy=False) raw = resp.read() resp.close() diff --git a/plugins/voice.py b/plugins/voice.py index c93a569..084f7b5 100644 --- a/plugins/voice.py +++ b/plugins/voice.py @@ -38,6 +38,18 @@ _MAX_SAY_LEN = 500 # max characters for !say _WHISPER_URL = "http://192.168.129.9:8080/inference" _PIPER_URL = "http://192.168.129.9:5100/" + +def _find_voice_peer(bot): + """Find the voice-capable peer (the bot with 'voice' in only_plugins).""" + bots = getattr(bot.registry, "_bots", {}) + for name, b in bots.items(): + if name == bot._username: + continue + if getattr(b, "_only_plugins", None) and "voice" in b._only_plugins: + return b + return None + + # -- Per-bot state ----------------------------------------------------------- @@ -172,8 +184,10 @@ async def _flush_monitor(bot): remainder = text[len(trigger):].strip() if remainder: log.info("voice: trigger from %s: %s", name, remainder) - bot._spawn( - _tts_play(bot, remainder), name="voice-tts", + # Route TTS through voice-capable peer if available + speaker = _find_voice_peer(bot) or bot + speaker._spawn( + _tts_play(speaker, remainder), name="voice-tts", ) continue @@ -242,10 +256,13 @@ async def _tts_play(bot, text: str): if wav_path is None: return try: + # Signal music plugin to duck while TTS is playing + bot.registry._tts_active = True done = asyncio.Event() await bot.stream_audio(str(wav_path), volume=1.0, on_done=done) await done.wait() finally: + bot.registry._tts_active = False Path(wav_path).unlink(missing_ok=True) diff --git a/src/derp/cli.py b/src/derp/cli.py index d3ce980..9417e7b 100644 --- a/src/derp/cli.py +++ b/src/derp/cli.py @@ -167,13 +167,18 @@ def main(argv: list[str] | None = None) -> int: elif "except_plugins" in extra: merged_mu.pop("only_plugins", None) extra_cfg["mumble"] = merged_mu - # Extra bots inherit [voice] config but not the trigger - if "voice" not in extra: + username = extra.get("username", f"mumble-{len(bots)}") + # Voice config: per-bot [.voice] overrides global [voice] + per_bot_voice = config.get(username, {}).get("voice") + if per_bot_voice: + voice_cfg = dict(config.get("voice", {})) + voice_cfg.update(per_bot_voice) + extra_cfg["voice"] = voice_cfg + elif "voice" not in extra: extra_cfg["voice"] = { k: v for k, v in config.get("voice", {}).items() if k != "trigger" } - username = extra.get("username", f"mumble-{len(bots)}") bot = MumbleBot(username, extra_cfg, registry) bots.append(bot) diff --git a/src/derp/http.py b/src/derp/http.py index 169e4b6..3631488 100644 --- a/src/derp/http.py +++ b/src/derp/http.py @@ -40,8 +40,8 @@ def _get_pool() -> SOCKSProxyManager: if _pool is None: _pool = SOCKSProxyManager( f"socks5h://{_PROXY_ADDR}:{_PROXY_PORT}/", - num_pools=20, - maxsize=4, + num_pools=30, + maxsize=8, retries=_POOL_RETRIES, ) return _pool @@ -85,10 +85,46 @@ class _ProxyHandler(SocksiPyHandler, urllib.request.HTTPSHandler): # -- Public HTTP interface --------------------------------------------------- + +class _PooledResponse: + """Thin wrapper around a preloaded urllib3 response. + + Provides a ``read()`` that behaves like stdlib (returns full data + on first call, empty bytes on subsequent calls), plus ``close()`` + as a no-op. Preloading ensures the underlying connection returns + to the pool immediately. + """ + + __slots__ = ("status", "headers", "reason", "_data", "_pos") + + def __init__(self, resp): + self.status = resp.status + self.headers = resp.headers + self.reason = resp.reason + self._data = resp.data # already fully read (preloaded) + self._pos = 0 + + def read(self, amt=None): + if self._pos >= len(self._data): + return b"" + if amt is None: + chunk = self._data[self._pos:] + self._pos = len(self._data) + else: + chunk = self._data[self._pos:self._pos + amt] + self._pos += len(chunk) + return chunk + + def close(self): + pass + + def urlopen(req, *, timeout=None, context=None, retries=None, proxy=True): """HTTP urlopen with optional SOCKS5 proxy. - Uses connection pooling via urllib3 for proxied requests. + Uses connection pooling via urllib3 for proxied requests. Responses + are preloaded so the SOCKS connection returns to the pool immediately + (avoids opening 500+ fresh connections per session). Falls back to legacy opener for custom SSL context. When ``proxy=False``, uses stdlib ``urllib.request.urlopen`` directly. Retries on transient SSL/connection errors with exponential backoff. @@ -123,17 +159,14 @@ def urlopen(req, *, timeout=None, context=None, retries=None, proxy=True): headers=headers, body=body, timeout=to, - preload_content=False, + preload_content=True, ) if resp.status >= 400: - # Drain body so connection returns to pool, then raise - # urllib.error.HTTPError for backward compatibility. - resp.read() raise urllib.error.HTTPError( url, resp.status, resp.reason or "", resp.headers, None, ) - return resp + return _PooledResponse(resp) except urllib.error.HTTPError: raise except _RETRY_ERRORS as exc: diff --git a/src/derp/mumble.py b/src/derp/mumble.py index acb3d6e..516ab7c 100644 --- a/src/derp/mumble.py +++ b/src/derp/mumble.py @@ -166,6 +166,7 @@ class MumbleBot: self._sound_listeners: list = [] self._receive_sound: bool = mu_cfg.get("receive_sound", True) self._self_mute: bool = mu_cfg.get("self_mute", False) + self._self_deaf: bool = mu_cfg.get("self_deaf", False) self._mute_task: asyncio.Task | None = None self._only_plugins: set[str] | None = ( set(mu_cfg["only_plugins"]) if "only_plugins" in mu_cfg else None @@ -232,6 +233,11 @@ class MumbleBot: self._mumble.users.myself.mute() except Exception: log.exception("mumble: failed to self-mute on connect") + if self._self_deaf: + try: + self._mumble.users.myself.deafen() + except Exception: + log.exception("mumble: failed to self-deafen on connect") if self._loop: asyncio.run_coroutine_threadsafe( self._notify_plugins_connected(), self._loop, @@ -317,6 +323,8 @@ class MumbleBot: """Process a text message from pymumble (runs on asyncio loop).""" text = _strip_html(pb_msg.message) actor = pb_msg.actor + log.debug("mumble: [%s] text from actor %s: %s", + self._username, actor, text[:100]) # Look up sender username nick = None @@ -345,6 +353,13 @@ class MumbleBot: is_channel=is_channel, params=[target or "", text], ) + + # Check for direct addressing: "botname: command ..." + addressed = self._parse_addressed(text) + if addressed is not None: + await self._dispatch_addressed(msg, addressed) + return + await self._dispatch_command(msg) # -- Lifecycle ----------------------------------------------------------- @@ -368,6 +383,60 @@ class MumbleBot: self._mumble.stop() self._mumble = None + # -- Direct addressing --------------------------------------------------- + + def _parse_addressed(self, text: str) -> str | None: + """Check if text is addressed to this bot: ``botname: rest``. + + Returns the text after the address prefix, or None. + """ + name = self._username.lower() + lowered = text.lower() + for sep in (":", ",", " "): + prefix = name + sep + if lowered.startswith(prefix): + return text[len(prefix):].strip() + return None + + def _find_voice_peer(self): + """Find the voice-capable bot (the one with 'voice' in only_plugins).""" + bots = getattr(self.registry, "_bots", {}) + for name, bot in bots.items(): + if name == self._username: + continue + if bot._only_plugins and "voice" in bot._only_plugins: + return bot + return None + + async def _dispatch_addressed(self, msg: MumbleMessage, text: str) -> None: + """Handle a message directly addressed to this bot. + + Supports a small set of built-in commands that don't use the + ``!prefix`` convention. Currently: ``say ``. + + TTS playback is routed through the voice-capable peer (e.g. + derp) so audio comes from the music bot's connection. + """ + parts = text.split(None, 1) + if not parts: + return + sub = parts[0].lower() + arg = parts[1] if len(parts) > 1 else "" + + log.info("mumble: [%s] addressed command: %s (arg=%s)", + self._username, sub, arg[:80]) + + if sub == "say" and arg: + voice_mod = self.registry._modules.get("voice") + tts_play = getattr(voice_mod, "_tts_play", None) if voice_mod else None + if tts_play is None: + await self.reply(msg, "Voice not available") + return + # Route audio through the voice-capable peer + speaker = self._find_voice_peer() or self + speaker._spawn(tts_play(speaker, arg), name="addressed-say") + # Extend with elif for future addressed commands + # -- Command dispatch ---------------------------------------------------- async def _dispatch_command(self, msg: MumbleMessage) -> None: @@ -816,12 +885,17 @@ class MumbleBot: pass log.info("stream_audio: finished, %d frames", frames) except asyncio.CancelledError: - try: - if self._is_audio_ready(): - self._mumble.sound_output.clear_buffer() - except Exception: - pass - log.info("stream_audio: cancelled at frame %d", frames) + # Only clear the buffer if volume is still audible -- if a + # fade-out has already driven _cur_vol to ~0 the remaining + # frames are silent and clearing mid-drain causes a click. + if _cur_vol > 0.01: + try: + if self._is_audio_ready(): + self._mumble.sound_output.clear_buffer() + except Exception: + pass + log.info("stream_audio: cancelled at frame %d (vol=%.3f)", + frames, _cur_vol) raise except Exception: log.exception("stream_audio: error at frame %d", frames) diff --git a/tests/test_http.py b/tests/test_http.py index 902060e..e76be30 100644 --- a/tests/test_http.py +++ b/tests/test_http.py @@ -203,11 +203,15 @@ class TestUrlopen: pool = MagicMock() resp = MagicMock() resp.status = 200 + resp.data = b"ok" + resp.reason = "OK" + resp.headers = {} pool.request.return_value = resp mock_pool_fn.return_value = pool result = urlopen("https://example.com/") - assert result is resp + assert result.status == 200 + assert result.read() == b"ok" @patch.object(derp.http, "_get_pool") def test_context_falls_back_to_opener(self, mock_pool_fn): diff --git a/tests/test_music.py b/tests/test_music.py index 825b4f7..48ac34b 100644 --- a/tests/test_music.py +++ b/tests/test_music.py @@ -563,6 +563,48 @@ class TestPlaylistExpansion: assert "list=PLxyz" in called_url assert len(tracks) == 2 + def test_random_fragment_shuffles(self): + """#random fragment shuffles resolved playlist tracks.""" + bot = _FakeBot() + msg = _Msg(text="!play https://example.com/playlist#random") + tracks = [(f"https://example.com/{i}", f"Track {i}") for i in range(20)] + with patch.object(_mod, "_resolve_tracks", return_value=list(tracks)) as mock_rt: + with patch.object(_mod, "_ensure_loop"): + asyncio.run(_mod.cmd_play(bot, msg)) + # Fragment stripped before passing to resolver + called_url = mock_rt.call_args[0][0] + assert "#random" not in called_url + ps = _mod._ps(bot) + assert len(ps["queue"]) == 20 + # Extremely unlikely (1/20!) that shuffle preserves exact order + titles = [t.title for t in ps["queue"]] + assert titles != [f"Track {i}" for i in range(20)] or len(titles) == 1 + # Announces shuffle + assert any("shuffled" in r for r in bot.replied) + + def test_random_fragment_single_track_no_error(self): + """#random on a single-video URL works fine (nothing to shuffle).""" + bot = _FakeBot() + msg = _Msg(text="!play https://example.com/video#random") + tracks = [("https://example.com/video", "Solo Track")] + with patch.object(_mod, "_resolve_tracks", return_value=tracks): + with patch.object(_mod, "_ensure_loop"): + asyncio.run(_mod.cmd_play(bot, msg)) + ps = _mod._ps(bot) + assert len(ps["queue"]) == 1 + assert ps["queue"][0].title == "Solo Track" + + def test_random_fragment_ignored_for_search(self): + """#random is not treated specially for search queries.""" + bot = _FakeBot() + msg = _Msg(text="!play jazz #random") + tracks = [("https://example.com/1", "Result")] + with patch.object(_mod, "_resolve_tracks", return_value=tracks) as mock_rt: + with patch.object(_mod, "_ensure_loop"): + asyncio.run(_mod.cmd_play(bot, msg)) + # Search query passed as-is (not a URL, fragment not stripped) + assert mock_rt.call_args[0][0] == "ytsearch10:jazz #random" + def test_resolve_tracks_error_fallback(self): """On error, returns [(url, url)].""" with patch("subprocess.run", side_effect=Exception("fail")): @@ -577,6 +619,136 @@ class TestPlaylistExpansion: tracks = _mod._resolve_tracks("https://example.com/empty") assert tracks == [("https://example.com/empty", "https://example.com/empty")] + def test_resolve_tracks_start_param(self): + """start= passes --playlist-start to yt-dlp.""" + result = MagicMock() + result.stdout = "https://example.com/6\nTrack 6\n" + with patch("subprocess.run", return_value=result) as mock_run: + tracks = _mod._resolve_tracks("https://example.com/pl", + max_tracks=5, start=6) + cmd = mock_run.call_args[0][0] + assert "--playlist-start=6" in cmd + assert "--playlist-end=10" in cmd + assert tracks == [("https://example.com/6", "Track 6")] + + def test_resolve_tracks_start_empty_returns_empty(self): + """Paginated call with no results returns [] (not fallback).""" + result = MagicMock() + result.stdout = "" + with patch("subprocess.run", return_value=result): + tracks = _mod._resolve_tracks("https://example.com/pl", + start=100) + assert tracks == [] + + def test_resolve_tracks_start_error_returns_empty(self): + """Paginated call on error returns [] (not fallback).""" + with patch("subprocess.run", side_effect=Exception("fail")): + tracks = _mod._resolve_tracks("https://example.com/pl", + start=10) + assert tracks == [] + + def test_playlist_url_triggers_batched_resolve(self): + """Playlist URL resolves initial batch, spawns feeder for rest.""" + bot = _FakeBot() + batch = _mod._PLAYLIST_BATCH + initial = [(f"https://example.com/{i}", f"T{i}") + for i in range(batch)] + spawned = [] + orig_spawn = bot._spawn + + def spy_spawn(coro, *, name=None): + spawned.append(name) + return orig_spawn(coro, name=name) + + bot._spawn = spy_spawn + msg = _Msg(text="!play https://example.com/watch?v=a&list=PLxyz") + with patch.object(_mod, "_resolve_tracks", return_value=initial): + with patch.object(_mod, "_ensure_loop"): + asyncio.run(_mod.cmd_play(bot, msg)) + ps = _mod._ps(bot) + assert len(ps["queue"]) == batch + assert "music-playlist-feeder" in spawned + assert any("resolving more" in r.lower() for r in bot.replied) + + def test_non_playlist_url_no_feeder(self): + """Single video URL does not spawn background feeder.""" + bot = _FakeBot() + spawned = [] + orig_spawn = bot._spawn + + def spy_spawn(coro, *, name=None): + spawned.append(name) + return orig_spawn(coro, name=name) + + bot._spawn = spy_spawn + tracks = [("https://example.com/v", "Video")] + msg = _Msg(text="!play https://example.com/v") + with patch.object(_mod, "_resolve_tracks", return_value=tracks): + with patch.object(_mod, "_ensure_loop"): + asyncio.run(_mod.cmd_play(bot, msg)) + assert "music-playlist-feeder" not in spawned + + def test_playlist_feeder_appends_to_queue(self): + """Background feeder resolves remaining tracks into queue.""" + bot = _FakeBot() + ps = _mod._ps(bot) + remaining = [("https://example.com/6", "Track 6"), + ("https://example.com/7", "Track 7")] + + async def _check(): + with patch.object(_mod, "_resolve_tracks", + return_value=remaining): + await _mod._playlist_feeder( + bot, "https://example.com/pl", 6, 10, + False, "Alice", "https://example.com/pl", + ) + assert len(ps["queue"]) == 2 + assert ps["queue"][0].title == "Track 6" + assert ps["queue"][1].requester == "Alice" + + asyncio.run(_check()) + + def test_playlist_feeder_shuffles(self): + """Background feeder shuffles when shuffle=True.""" + bot = _FakeBot() + ps = _mod._ps(bot) + remaining = [(f"https://example.com/{i}", f"T{i}") + for i in range(20)] + + async def _check(): + with patch.object(_mod, "_resolve_tracks", + return_value=list(remaining)): + await _mod._playlist_feeder( + bot, "https://example.com/pl", 6, 20, + True, "Alice", "", + ) + titles = [t.title for t in ps["queue"]] + assert len(titles) == 20 + # Extremely unlikely shuffle preserves order + assert titles != [f"T{i}" for i in range(20)] + + asyncio.run(_check()) + + def test_playlist_feeder_respects_queue_cap(self): + """Background feeder stops at _MAX_QUEUE.""" + bot = _FakeBot() + ps = _mod._ps(bot) + # Pre-fill queue to near capacity + ps["queue"] = [_mod._Track(url="x", title="t", requester="a") + for _ in range(_mod._MAX_QUEUE - 2)] + remaining = [(f"https://example.com/{i}", f"T{i}") + for i in range(10)] + + async def _check(): + with patch.object(_mod, "_resolve_tracks", + return_value=remaining): + await _mod._playlist_feeder( + bot, "url", 6, 10, False, "a", "", + ) + assert len(ps["queue"]) == _mod._MAX_QUEUE + + asyncio.run(_check()) + # --------------------------------------------------------------------------- # TestResumeState @@ -925,6 +1097,56 @@ class TestDuckMonitor: pass asyncio.run(_check()) + def test_tts_active_ducks(self): + """TTS activity from voice peer triggers ducking.""" + bot = _FakeBot() + ps = _mod._ps(bot) + ps["duck_enabled"] = True + ps["duck_floor"] = 5 + ps["duck_restore"] = 1 # fast restore for test + bot.registry._voice_ts = 0.0 + bot.registry._tts_active = True + + async def _check(): + task = asyncio.create_task(_mod._duck_monitor(bot)) + await asyncio.sleep(1.5) + assert ps["duck_vol"] == 5.0 + # TTS ends -- restore should begin and complete quickly + bot.registry._tts_active = False + await asyncio.sleep(2.5) + assert ps["duck_vol"] is None + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + asyncio.run(_check()) + + def test_tts_active_overrides_all_muted(self): + """TTS ducks even when all users are muted.""" + bot = _FakeBot() + ps = _mod._ps(bot) + ps["duck_enabled"] = True + ps["duck_floor"] = 5 + bot.registry._voice_ts = time.monotonic() + bot.registry._tts_active = True + # Simulate all users muted + bot._mumble = MagicMock() + bot._mumble.users = {1: {"name": "human", "self_mute": True, + "mute": False, "self_deaf": False}} + bot.registry._bots = {} + + async def _check(): + task = asyncio.create_task(_mod._duck_monitor(bot)) + await asyncio.sleep(1.5) + assert ps["duck_vol"] == 5.0 + task.cancel() + try: + await task + except asyncio.CancelledError: + pass + asyncio.run(_check()) + # --------------------------------------------------------------------------- # TestAutoResume @@ -1109,33 +1331,21 @@ class TestAutoResume: class TestAutoplayKept: - def test_shuffles_kept_tracks(self, tmp_path): - """Autoplay loads kept tracks, shuffles, and starts playback.""" + def test_starts_loop_with_kept_tracks(self, tmp_path): + """Autoplay starts play loop when kept tracks exist.""" bot = _FakeBot() bot.registry._voice_ts = 0.0 music_dir = tmp_path / "music" music_dir.mkdir() - # Create two kept files (music_dir / "a.opus").write_bytes(b"audio") - (music_dir / "b.opus").write_bytes(b"audio") bot.state.set("music", "keep:1", json.dumps({ "url": "https://example.com/a", "title": "Track A", "filename": "a.opus", "id": 1, })) - bot.state.set("music", "keep:2", json.dumps({ - "url": "https://example.com/b", "title": "Track B", - "filename": "b.opus", "id": 2, - })) with patch.object(_mod, "_MUSIC_DIR", music_dir), \ patch.object(_mod, "_ensure_loop") as mock_loop: asyncio.run(_mod._autoplay_kept(bot)) mock_loop.assert_called_once_with(bot) - ps = _mod._ps(bot) - assert len(ps["queue"]) == 2 - titles = {t.title for t in ps["queue"]} - assert titles == {"Track A", "Track B"} - # All tracks marked keep=True - assert all(t.keep for t in ps["queue"]) def test_skips_when_already_playing(self): bot = _FakeBot() @@ -1429,6 +1639,20 @@ class TestKeptCommand: assert not list(music_dir.iterdir()) assert bot.state.get("music", "keep:1") is None + def test_kept_shows_missing_marker(self, tmp_path): + """Tracks with missing files show [MISSING] in listing.""" + bot = _FakeBot() + music_dir = tmp_path / "music" + music_dir.mkdir() + bot.state.set("music", "keep:1", json.dumps({ + "title": "Gone Track", "artist": "", "duration": 0, + "filename": "gone.opus", "id": 1, + })) + with patch.object(_mod, "_MUSIC_DIR", music_dir): + msg = _Msg(text="!kept") + asyncio.run(_mod.cmd_kept(bot, msg)) + assert any("MISSING" in r for r in bot.replied) + def test_kept_non_mumble(self): bot = _FakeBot(mumble=False) msg = _Msg(text="!kept") @@ -1908,3 +2132,102 @@ class TestFetchMetadata: assert meta["title"] == "" assert meta["artist"] == "" assert meta["duration"] == 0 + + +# --------------------------------------------------------------------------- +# TestKeptRepair +# --------------------------------------------------------------------------- + + +class TestKeptRepair: + def test_repair_nothing_missing(self, tmp_path): + """Repair reports all present when files exist.""" + bot = _FakeBot() + music_dir = tmp_path / "music" + music_dir.mkdir() + (music_dir / "song.opus").write_bytes(b"audio") + bot.state.set("music", "keep:1", json.dumps({ + "url": "https://example.com/v", "title": "Song", + "filename": "song.opus", "id": 1, + })) + with patch.object(_mod, "_MUSIC_DIR", music_dir): + msg = _Msg(text="!kept repair") + asyncio.run(_mod.cmd_kept(bot, msg)) + assert any("nothing to repair" in r.lower() for r in bot.replied) + + def test_repair_downloads_missing(self, tmp_path): + """Repair re-downloads missing files.""" + bot = _FakeBot() + music_dir = tmp_path / "music" + music_dir.mkdir() + bot.state.set("music", "keep:1", json.dumps({ + "url": "https://example.com/v", "title": "Song", + "filename": "song.opus", "id": 1, + })) + + dl_path = tmp_path / "cache" / "dl.opus" + dl_path.parent.mkdir() + dl_path.write_bytes(b"audio") + + with patch.object(_mod, "_MUSIC_DIR", music_dir), \ + patch.object(_mod, "_download_track", return_value=dl_path): + msg = _Msg(text="!kept repair") + asyncio.run(_mod.cmd_kept(bot, msg)) + assert any("1 restored" in r for r in bot.replied) + assert (music_dir / "song.opus").is_file() + + def test_repair_counts_failures(self, tmp_path): + """Repair reports failed downloads.""" + bot = _FakeBot() + music_dir = tmp_path / "music" + music_dir.mkdir() + bot.state.set("music", "keep:1", json.dumps({ + "url": "https://example.com/v", "title": "Song", + "filename": "song.opus", "id": 1, + })) + + with patch.object(_mod, "_MUSIC_DIR", music_dir), \ + patch.object(_mod, "_download_track", return_value=None): + msg = _Msg(text="!kept repair") + asyncio.run(_mod.cmd_kept(bot, msg)) + assert any("1 failed" in r for r in bot.replied) + + def test_repair_no_url_skips(self, tmp_path): + """Repair skips entries with no URL.""" + bot = _FakeBot() + music_dir = tmp_path / "music" + music_dir.mkdir() + bot.state.set("music", "keep:1", json.dumps({ + "url": "", "title": "No URL", + "filename": "nourl.opus", "id": 1, + })) + + with patch.object(_mod, "_MUSIC_DIR", music_dir): + msg = _Msg(text="!kept repair") + asyncio.run(_mod.cmd_kept(bot, msg)) + assert any("1 failed" in r for r in bot.replied) + + def test_repair_extension_mismatch(self, tmp_path): + """Repair updates metadata when download extension differs.""" + bot = _FakeBot() + music_dir = tmp_path / "music" + music_dir.mkdir() + bot.state.set("music", "keep:1", json.dumps({ + "url": "https://example.com/v", "title": "Song", + "filename": "song.opus", "id": 1, + })) + + dl_path = tmp_path / "cache" / "dl.webm" + dl_path.parent.mkdir() + dl_path.write_bytes(b"audio") + + with patch.object(_mod, "_MUSIC_DIR", music_dir), \ + patch.object(_mod, "_download_track", return_value=dl_path): + msg = _Msg(text="!kept repair") + asyncio.run(_mod.cmd_kept(bot, msg)) + assert any("1 restored" in r for r in bot.replied) + # Filename updated to new extension + raw = bot.state.get("music", "keep:1") + stored = json.loads(raw) + assert stored["filename"] == "song.webm" + assert (music_dir / "song.webm").is_file() diff --git a/tools/profile b/tools/profile new file mode 100755 index 0000000..5719fbd --- /dev/null +++ b/tools/profile @@ -0,0 +1,97 @@ +#!/usr/bin/env bash +# Analyze cProfile data from the bot process. +# Usage: tools/profile [OPTIONS] [FILE] +# +# Options: +# -n NUM Show top NUM entries (default: 30) +# -s SORT Sort by: cumtime, tottime, calls, name (default: cumtime) +# -f PATTERN Filter to entries matching PATTERN +# -c Callers view (who calls the hot functions) +# -h Show this help +# +# Examples: +# tools/profile # top 30 by cumulative time +# tools/profile -s tottime -n 20 # top 20 by total time +# tools/profile -f mumble # only mumble-related functions +# tools/profile -c -f stream_audio # who calls stream_audio +# tools/profile data/old.prof # analyze a specific file + +# shellcheck source=tools/_common.sh +source "$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/_common.sh" + +DEFAULT_PROF="$PROJECT_DIR/data/derp.prof" +TOP=30 +SORT="cumtime" +PATTERN="" +CALLERS=false + +usage() { + sed -n '2,/^$/s/^# \?//p' "$0" + exit 0 +} + +while getopts ":n:s:f:ch" opt; do + case $opt in + n) TOP="$OPTARG" ;; + s) SORT="$OPTARG" ;; + f) PATTERN="$OPTARG" ;; + c) CALLERS=true ;; + h) usage ;; + :) err "option -$OPTARG requires an argument"; exit 2 ;; + *) err "unknown option -$OPTARG"; exit 2 ;; + esac +done +shift $((OPTIND - 1)) + +PROF="${1:-$DEFAULT_PROF}" + +if [[ ! -f "$PROF" ]]; then + err "profile not found: $PROF" + dim "run the bot with --cprofile and stop it gracefully" + exit 1 +fi + +# Validate sort key +case "$SORT" in + cumtime|tottime|calls|name) ;; + *) err "invalid sort key: $SORT (use cumtime, tottime, calls, name)"; exit 2 ;; +esac + +# Profile metadata +size=$(stat -c %s "$PROF" 2>/dev/null || stat -f %z "$PROF" 2>/dev/null) +human=$(numfmt --to=iec-i --suffix=B "$size" 2>/dev/null || echo "${size}B") +modified=$(stat -c %y "$PROF" 2>/dev/null | cut -d. -f1) + +printf '%b%s%b\n' "$BLU" "Profile" "$RST" +dim "$PROF ($human, $modified)" +echo + +# Build pstats script +read -r -d '' PYSCRIPT << 'PYEOF' || true +import pstats +import sys +import io + +prof_path = sys.argv[1] +sort_key = sys.argv[2] +top_n = int(sys.argv[3]) +pattern = sys.argv[4] +callers = sys.argv[5] == "1" + +p = pstats.Stats(prof_path, stream=sys.stdout) +p.strip_dirs() +p.sort_stats(sort_key) + +if pattern: + if callers: + p.print_callers(pattern, top_n) + else: + p.print_stats(pattern, top_n) +else: + if callers: + p.print_callers(top_n) + else: + p.print_stats(top_n) +PYEOF + +exec python3 -c "$PYSCRIPT" "$PROF" "$SORT" "$TOP" "$PATTERN" "$( $CALLERS && echo 1 || echo 0 )"