Files
derp/plugins/pastemoni.py
user 073659607e feat: add multi-server support
Connect to multiple IRC servers concurrently from a single config file.
Plugins are loaded once and shared; per-server state is isolated via
separate SQLite databases and per-bot runtime state (bot._pstate).

- Add build_server_configs() for [servers.*] config layout
- Bot.__init__ gains name parameter, _pstate dict for plugin isolation
- cli.py runs multiple bots via asyncio.gather
- 9 stateful plugins migrated from module-level dicts to _ps(bot) pattern
- Backward compatible: legacy [server] config works unchanged

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-21 19:04:20 +01:00

532 lines
16 KiB
Python

"""Plugin: paste site keyword monitor for Pastebin and GitHub Gists."""
from __future__ import annotations
import asyncio
import json
import logging
import re
import urllib.request
from datetime import datetime, timezone
from html.parser import HTMLParser
from derp.http import urlopen as _urlopen
from derp.plugin import command, event
_log = logging.getLogger(__name__)
# -- Constants ---------------------------------------------------------------
_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,19}$")
_MAX_SEEN = 200
_MAX_ANNOUNCE = 5
_DEFAULT_INTERVAL = 300
_MAX_INTERVAL = 3600
_FETCH_TIMEOUT = 15
_USER_AGENT = "derp-bot/1.0 (IRC paste monitor)"
_MAX_MONITORS = 20
_MAX_SNIPPET_LEN = 80
_MAX_TITLE_LEN = 60
# -- Per-bot runtime state ---------------------------------------------------
def _ps(bot):
"""Per-bot plugin runtime state."""
return bot._pstate.setdefault("pastemoni", {
"pollers": {},
"monitors": {},
"errors": {},
})
# -- Pure helpers ------------------------------------------------------------
def _state_key(channel: str, name: str) -> str:
"""Build composite state key."""
return f"{channel}:{name}"
def _validate_name(name: str) -> bool:
"""Check name against allowed pattern."""
return bool(_NAME_RE.match(name))
def _truncate(text: str, max_len: int = _MAX_TITLE_LEN) -> str:
"""Truncate text with ellipsis if needed."""
if len(text) <= max_len:
return text
return text[: max_len - 3].rstrip() + "..."
def _snippet_around(text: str, keyword: str, max_len: int = _MAX_SNIPPET_LEN) -> str:
"""Extract snippet centered around keyword match."""
if not text:
return ""
text = " ".join(text.split()) # collapse whitespace
if len(text) <= max_len:
return text
idx = text.lower().find(keyword.lower())
if idx < 0:
return text[: max_len - 3] + "..."
start = max(0, idx - max_len // 3)
end = min(len(text), start + max_len)
snippet = text[start:end]
if start > 0:
snippet = "..." + snippet
if end < len(text):
snippet = snippet + "..."
return snippet
# -- State helpers -----------------------------------------------------------
def _save(bot, key: str, data: dict) -> None:
"""Persist monitor data to bot.state."""
bot.state.set("pastemoni", key, json.dumps(data))
def _load(bot, key: str) -> dict | None:
"""Load monitor data from bot.state."""
raw = bot.state.get("pastemoni", key)
if raw is None:
return None
try:
return json.loads(raw)
except json.JSONDecodeError:
return None
def _delete(bot, key: str) -> None:
"""Remove monitor data from bot.state."""
bot.state.delete("pastemoni", key)
# -- Pastebin archive parser ------------------------------------------------
class _ArchiveParser(HTMLParser):
"""Extract paste links from Pastebin archive HTML."""
def __init__(self):
super().__init__()
self.links: list[tuple[str, str]] = [] # (paste_id, title)
self._in_link = False
self._href = ""
self._title_parts: list[str] = []
def handle_starttag(self, tag, attrs):
if tag != "a":
return
attr_map = {k: (v or "") for k, v in attrs}
href = attr_map.get("href", "")
if re.match(r"^/[a-zA-Z0-9]{8}$", href):
self._in_link = True
self._href = href[1:] # strip leading /
self._title_parts = []
def handle_data(self, data):
if self._in_link:
self._title_parts.append(data)
def handle_endtag(self, tag):
if tag == "a" and self._in_link:
self._in_link = False
title = "".join(self._title_parts).strip()
if self._href:
self.links.append((self._href, title))
# -- Pastebin backend --------------------------------------------------------
def _fetch_pastebin(keyword: str) -> list[dict]:
"""Scrape Pastebin archive and filter by keyword. Blocking."""
req = urllib.request.Request("https://pastebin.com/archive", method="GET")
req.add_header("User-Agent", _USER_AGENT)
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read()
resp.close()
html = raw.decode("utf-8", errors="replace")
parser = _ArchiveParser()
parser.feed(html)
kw_lower = keyword.lower()
results: list[dict] = []
for paste_id, title in parser.links[:30]:
# Check title first (avoids raw fetch)
if kw_lower in title.lower():
results.append({
"id": paste_id,
"title": _truncate(title, _MAX_TITLE_LEN),
"url": f"https://pastebin.com/{paste_id}",
"snippet": "",
})
continue
# Fetch raw content and check
try:
raw_req = urllib.request.Request(
f"https://pastebin.com/raw/{paste_id}", method="GET",
)
raw_req.add_header("User-Agent", _USER_AGENT)
raw_resp = _urlopen(raw_req, timeout=_FETCH_TIMEOUT)
content = raw_resp.read().decode("utf-8", errors="replace")
raw_resp.close()
except Exception:
continue
if kw_lower in content.lower():
results.append({
"id": paste_id,
"title": _truncate(title or "(untitled)", _MAX_TITLE_LEN),
"url": f"https://pastebin.com/{paste_id}",
"snippet": _snippet_around(content, keyword),
})
return results
# -- GitHub Gists backend ----------------------------------------------------
def _fetch_gists(keyword: str) -> list[dict]:
"""Query GitHub public gists and filter by keyword. Blocking."""
req = urllib.request.Request(
"https://api.github.com/gists/public?per_page=30", method="GET",
)
req.add_header("User-Agent", _USER_AGENT)
req.add_header("Accept", "application/vnd.github+json")
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read()
resp.close()
gists = json.loads(raw)
kw_lower = keyword.lower()
results: list[dict] = []
for gist in gists if isinstance(gists, list) else []:
gist_id = gist.get("id", "")
if not gist_id:
continue
description = gist.get("description") or ""
html_url = gist.get("html_url", "")
files = gist.get("files") or {}
filenames = " ".join(files.keys())
searchable = f"{description} {filenames}"
if kw_lower not in searchable.lower():
continue
source = description or filenames
title = _truncate(source or "(no description)", _MAX_TITLE_LEN)
snippet = _snippet_around(source, keyword) if len(source) > _MAX_TITLE_LEN else ""
results.append({
"id": gist_id,
"title": title,
"url": html_url,
"snippet": snippet,
})
return results
# -- Backend registry -------------------------------------------------------
_BACKENDS: dict[str, callable] = {
"pb": _fetch_pastebin,
"gh": _fetch_gists,
}
# -- Polling -----------------------------------------------------------------
async def _poll_once(bot, key: str, announce: bool = True) -> None:
"""Single poll cycle for one monitor (all backends)."""
ps = _ps(bot)
data = ps["monitors"].get(key)
if data is None:
data = _load(bot, key)
if data is None:
return
ps["monitors"][key] = data
keyword = data["keyword"]
now = datetime.now(timezone.utc).isoformat()
data["last_poll"] = now
loop = asyncio.get_running_loop()
had_success = False
for tag, backend in _BACKENDS.items():
try:
items = await loop.run_in_executor(None, backend, keyword)
except Exception as exc:
_log.debug("pastemoni %s/%s error: %s", key, tag, exc)
data.setdefault("last_errors", {})[tag] = str(exc)
continue
had_success = True
data.setdefault("last_errors", {}).pop(tag, None)
seen_set = set(data.get("seen", {}).get(tag, []))
seen_list = list(data.get("seen", {}).get(tag, []))
new_items = [item for item in items if item["id"] not in seen_set]
if announce and new_items:
channel = data["channel"]
shown = new_items[:_MAX_ANNOUNCE]
for item in shown:
title = item.get("title") or "(untitled)"
snippet = item.get("snippet", "")
url = item.get("url", "")
if url:
url = await bot.shorten_url(url)
parts = [f"[{tag}] {title}"]
if snippet:
parts.append(snippet)
if url:
parts.append(url)
await bot.send(channel, " -- ".join(parts))
remaining = len(new_items) - len(shown)
if remaining > 0:
await bot.send(channel, f"[{tag}] ... and {remaining} more")
for item in new_items:
seen_list.append(item["id"])
if len(seen_list) > _MAX_SEEN:
seen_list = seen_list[-_MAX_SEEN:]
data.setdefault("seen", {})[tag] = seen_list
if had_success:
ps["errors"][key] = 0
else:
ps["errors"][key] = ps["errors"].get(key, 0) + 1
ps["monitors"][key] = data
_save(bot, key, data)
async def _poll_loop(bot, key: str) -> None:
"""Infinite poll loop for one monitor."""
try:
while True:
ps = _ps(bot)
data = ps["monitors"].get(key) or _load(bot, key)
if data is None:
return
interval = data.get("interval", _DEFAULT_INTERVAL)
errs = ps["errors"].get(key, 0)
if errs >= 5:
interval = min(interval * 2, _MAX_INTERVAL)
await asyncio.sleep(interval)
await _poll_once(bot, key, announce=True)
except asyncio.CancelledError:
pass
def _start_poller(bot, key: str) -> None:
"""Create and track a poller task."""
ps = _ps(bot)
existing = ps["pollers"].get(key)
if existing and not existing.done():
return
task = asyncio.create_task(_poll_loop(bot, key))
ps["pollers"][key] = task
def _stop_poller(bot, key: str) -> None:
"""Cancel and remove a poller task."""
ps = _ps(bot)
task = ps["pollers"].pop(key, None)
if task and not task.done():
task.cancel()
ps["monitors"].pop(key, None)
ps["errors"].pop(key, 0)
# -- Restore on connect -----------------------------------------------------
def _restore(bot) -> None:
"""Rebuild pollers from persisted state."""
ps = _ps(bot)
for key in bot.state.keys("pastemoni"):
existing = ps["pollers"].get(key)
if existing and not existing.done():
continue
data = _load(bot, key)
if data is None:
continue
ps["monitors"][key] = data
_start_poller(bot, key)
@event("001")
async def on_connect(bot, message):
"""Restore paste monitor pollers on connect."""
_restore(bot)
# -- Command handler ---------------------------------------------------------
@command("pastemoni", help="Paste monitor: !pastemoni add|del|list|check")
async def cmd_pastemoni(bot, message):
"""Per-channel paste site keyword monitoring.
Usage:
!pastemoni add <name> <keyword> Add monitor (admin)
!pastemoni del <name> Remove monitor (admin)
!pastemoni list List monitors
!pastemoni check <name> Force-poll now
"""
parts = message.text.split(None, 3)
if len(parts) < 2:
await bot.reply(message, "Usage: !pastemoni <add|del|list|check> [args]")
return
sub = parts[1].lower()
# -- list ----------------------------------------------------------------
if sub == "list":
if not message.is_channel:
await bot.reply(message, "Use this command in a channel")
return
channel = message.target
prefix = f"{channel}:"
monitors = []
for key in bot.state.keys("pastemoni"):
if key.startswith(prefix):
data = _load(bot, key)
if data:
name = data["name"]
keyword = data.get("keyword", "")
errs = data.get("last_errors", {})
entry = f"{name} ({keyword})"
if errs:
entry += f" [{len(errs)} errors]"
monitors.append(entry)
if not monitors:
await bot.reply(message, "No monitors in this channel")
return
await bot.reply(message, f"Monitors: {', '.join(monitors)}")
return
# -- check ---------------------------------------------------------------
if sub == "check":
if not message.is_channel:
await bot.reply(message, "Use this command in a channel")
return
if len(parts) < 3:
await bot.reply(message, "Usage: !pastemoni check <name>")
return
name = parts[2].lower()
channel = message.target
key = _state_key(channel, name)
data = _load(bot, key)
if data is None:
await bot.reply(message, f"No monitor '{name}' in this channel")
return
_ps(bot)["monitors"][key] = data
await _poll_once(bot, key, announce=True)
data = _ps(bot)["monitors"].get(key, data)
errs = data.get("last_errors", {})
if errs:
tags = ", ".join(sorted(errs))
await bot.reply(message, f"{name}: errors on {tags}")
else:
await bot.reply(message, f"{name}: checked")
return
# -- add (admin) ---------------------------------------------------------
if sub == "add":
if not bot._is_admin(message):
await bot.reply(message, "Permission denied: add requires admin")
return
if not message.is_channel:
await bot.reply(message, "Use this command in a channel")
return
if len(parts) < 4:
await bot.reply(message, "Usage: !pastemoni add <name> <keyword>")
return
name = parts[2].lower()
keyword = parts[3]
if not _validate_name(name):
await bot.reply(
message,
"Invalid name (lowercase alphanumeric + hyphens, 1-20 chars)",
)
return
channel = message.target
key = _state_key(channel, name)
if _load(bot, key) is not None:
await bot.reply(
message, f"Monitor '{name}' already exists in this channel",
)
return
ch_prefix = f"{channel}:"
count = sum(
1 for k in bot.state.keys("pastemoni") if k.startswith(ch_prefix)
)
if count >= _MAX_MONITORS:
await bot.reply(message, f"Monitor limit reached ({_MAX_MONITORS})")
return
now = datetime.now(timezone.utc).isoformat()
data = {
"keyword": keyword,
"name": name,
"channel": channel,
"interval": _DEFAULT_INTERVAL,
"added_by": message.nick,
"added_at": now,
"last_poll": now,
"last_errors": {},
"seen": {},
}
_save(bot, key, data)
_ps(bot)["monitors"][key] = data
async def _seed():
await _poll_once(bot, key, announce=False)
_start_poller(bot, key)
asyncio.create_task(_seed())
await bot.reply(
message,
f"Monitor '{name}' added for: {keyword} (seeding in background)",
)
return
# -- del (admin) ---------------------------------------------------------
if sub == "del":
if not bot._is_admin(message):
await bot.reply(message, "Permission denied: del requires admin")
return
if not message.is_channel:
await bot.reply(message, "Use this command in a channel")
return
if len(parts) < 3:
await bot.reply(message, "Usage: !pastemoni del <name>")
return
name = parts[2].lower()
channel = message.target
key = _state_key(channel, name)
if _load(bot, key) is None:
await bot.reply(message, f"No monitor '{name}' in this channel")
return
_stop_poller(bot, key)
_delete(bot, key)
await bot.reply(message, f"Removed '{name}'")
return
await bot.reply(message, "Usage: !pastemoni <add|del|list|check> [args]")