Connect to multiple IRC servers concurrently from a single config file. Plugins are loaded once and shared; per-server state is isolated via separate SQLite databases and per-bot runtime state (bot._pstate). - Add build_server_configs() for [servers.*] config layout - Bot.__init__ gains name parameter, _pstate dict for plugin isolation - cli.py runs multiple bots via asyncio.gather - 9 stateful plugins migrated from module-level dicts to _ps(bot) pattern - Backward compatible: legacy [server] config works unchanged Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
532 lines
16 KiB
Python
532 lines
16 KiB
Python
"""Plugin: paste site keyword monitor for Pastebin and GitHub Gists."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import re
|
|
import urllib.request
|
|
from datetime import datetime, timezone
|
|
from html.parser import HTMLParser
|
|
|
|
from derp.http import urlopen as _urlopen
|
|
from derp.plugin import command, event
|
|
|
|
_log = logging.getLogger(__name__)
|
|
|
|
# -- Constants ---------------------------------------------------------------
|
|
|
|
_NAME_RE = re.compile(r"^[a-z0-9][a-z0-9-]{0,19}$")
|
|
_MAX_SEEN = 200
|
|
_MAX_ANNOUNCE = 5
|
|
_DEFAULT_INTERVAL = 300
|
|
_MAX_INTERVAL = 3600
|
|
_FETCH_TIMEOUT = 15
|
|
_USER_AGENT = "derp-bot/1.0 (IRC paste monitor)"
|
|
_MAX_MONITORS = 20
|
|
_MAX_SNIPPET_LEN = 80
|
|
_MAX_TITLE_LEN = 60
|
|
|
|
# -- Per-bot runtime state ---------------------------------------------------
|
|
|
|
def _ps(bot):
|
|
"""Per-bot plugin runtime state."""
|
|
return bot._pstate.setdefault("pastemoni", {
|
|
"pollers": {},
|
|
"monitors": {},
|
|
"errors": {},
|
|
})
|
|
|
|
|
|
# -- Pure helpers ------------------------------------------------------------
|
|
|
|
def _state_key(channel: str, name: str) -> str:
|
|
"""Build composite state key."""
|
|
return f"{channel}:{name}"
|
|
|
|
|
|
def _validate_name(name: str) -> bool:
|
|
"""Check name against allowed pattern."""
|
|
return bool(_NAME_RE.match(name))
|
|
|
|
|
|
def _truncate(text: str, max_len: int = _MAX_TITLE_LEN) -> str:
|
|
"""Truncate text with ellipsis if needed."""
|
|
if len(text) <= max_len:
|
|
return text
|
|
return text[: max_len - 3].rstrip() + "..."
|
|
|
|
|
|
def _snippet_around(text: str, keyword: str, max_len: int = _MAX_SNIPPET_LEN) -> str:
|
|
"""Extract snippet centered around keyword match."""
|
|
if not text:
|
|
return ""
|
|
text = " ".join(text.split()) # collapse whitespace
|
|
if len(text) <= max_len:
|
|
return text
|
|
idx = text.lower().find(keyword.lower())
|
|
if idx < 0:
|
|
return text[: max_len - 3] + "..."
|
|
start = max(0, idx - max_len // 3)
|
|
end = min(len(text), start + max_len)
|
|
snippet = text[start:end]
|
|
if start > 0:
|
|
snippet = "..." + snippet
|
|
if end < len(text):
|
|
snippet = snippet + "..."
|
|
return snippet
|
|
|
|
|
|
# -- State helpers -----------------------------------------------------------
|
|
|
|
def _save(bot, key: str, data: dict) -> None:
|
|
"""Persist monitor data to bot.state."""
|
|
bot.state.set("pastemoni", key, json.dumps(data))
|
|
|
|
|
|
def _load(bot, key: str) -> dict | None:
|
|
"""Load monitor data from bot.state."""
|
|
raw = bot.state.get("pastemoni", key)
|
|
if raw is None:
|
|
return None
|
|
try:
|
|
return json.loads(raw)
|
|
except json.JSONDecodeError:
|
|
return None
|
|
|
|
|
|
def _delete(bot, key: str) -> None:
|
|
"""Remove monitor data from bot.state."""
|
|
bot.state.delete("pastemoni", key)
|
|
|
|
|
|
# -- Pastebin archive parser ------------------------------------------------
|
|
|
|
class _ArchiveParser(HTMLParser):
|
|
"""Extract paste links from Pastebin archive HTML."""
|
|
|
|
def __init__(self):
|
|
super().__init__()
|
|
self.links: list[tuple[str, str]] = [] # (paste_id, title)
|
|
self._in_link = False
|
|
self._href = ""
|
|
self._title_parts: list[str] = []
|
|
|
|
def handle_starttag(self, tag, attrs):
|
|
if tag != "a":
|
|
return
|
|
attr_map = {k: (v or "") for k, v in attrs}
|
|
href = attr_map.get("href", "")
|
|
if re.match(r"^/[a-zA-Z0-9]{8}$", href):
|
|
self._in_link = True
|
|
self._href = href[1:] # strip leading /
|
|
self._title_parts = []
|
|
|
|
def handle_data(self, data):
|
|
if self._in_link:
|
|
self._title_parts.append(data)
|
|
|
|
def handle_endtag(self, tag):
|
|
if tag == "a" and self._in_link:
|
|
self._in_link = False
|
|
title = "".join(self._title_parts).strip()
|
|
if self._href:
|
|
self.links.append((self._href, title))
|
|
|
|
|
|
# -- Pastebin backend --------------------------------------------------------
|
|
|
|
def _fetch_pastebin(keyword: str) -> list[dict]:
|
|
"""Scrape Pastebin archive and filter by keyword. Blocking."""
|
|
req = urllib.request.Request("https://pastebin.com/archive", method="GET")
|
|
req.add_header("User-Agent", _USER_AGENT)
|
|
|
|
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
|
raw = resp.read()
|
|
resp.close()
|
|
|
|
html = raw.decode("utf-8", errors="replace")
|
|
parser = _ArchiveParser()
|
|
parser.feed(html)
|
|
|
|
kw_lower = keyword.lower()
|
|
results: list[dict] = []
|
|
|
|
for paste_id, title in parser.links[:30]:
|
|
# Check title first (avoids raw fetch)
|
|
if kw_lower in title.lower():
|
|
results.append({
|
|
"id": paste_id,
|
|
"title": _truncate(title, _MAX_TITLE_LEN),
|
|
"url": f"https://pastebin.com/{paste_id}",
|
|
"snippet": "",
|
|
})
|
|
continue
|
|
|
|
# Fetch raw content and check
|
|
try:
|
|
raw_req = urllib.request.Request(
|
|
f"https://pastebin.com/raw/{paste_id}", method="GET",
|
|
)
|
|
raw_req.add_header("User-Agent", _USER_AGENT)
|
|
raw_resp = _urlopen(raw_req, timeout=_FETCH_TIMEOUT)
|
|
content = raw_resp.read().decode("utf-8", errors="replace")
|
|
raw_resp.close()
|
|
except Exception:
|
|
continue
|
|
|
|
if kw_lower in content.lower():
|
|
results.append({
|
|
"id": paste_id,
|
|
"title": _truncate(title or "(untitled)", _MAX_TITLE_LEN),
|
|
"url": f"https://pastebin.com/{paste_id}",
|
|
"snippet": _snippet_around(content, keyword),
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
# -- GitHub Gists backend ----------------------------------------------------
|
|
|
|
def _fetch_gists(keyword: str) -> list[dict]:
|
|
"""Query GitHub public gists and filter by keyword. Blocking."""
|
|
req = urllib.request.Request(
|
|
"https://api.github.com/gists/public?per_page=30", method="GET",
|
|
)
|
|
req.add_header("User-Agent", _USER_AGENT)
|
|
req.add_header("Accept", "application/vnd.github+json")
|
|
|
|
resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
|
|
raw = resp.read()
|
|
resp.close()
|
|
|
|
gists = json.loads(raw)
|
|
kw_lower = keyword.lower()
|
|
results: list[dict] = []
|
|
|
|
for gist in gists if isinstance(gists, list) else []:
|
|
gist_id = gist.get("id", "")
|
|
if not gist_id:
|
|
continue
|
|
description = gist.get("description") or ""
|
|
html_url = gist.get("html_url", "")
|
|
files = gist.get("files") or {}
|
|
filenames = " ".join(files.keys())
|
|
|
|
searchable = f"{description} {filenames}"
|
|
if kw_lower not in searchable.lower():
|
|
continue
|
|
|
|
source = description or filenames
|
|
title = _truncate(source or "(no description)", _MAX_TITLE_LEN)
|
|
snippet = _snippet_around(source, keyword) if len(source) > _MAX_TITLE_LEN else ""
|
|
results.append({
|
|
"id": gist_id,
|
|
"title": title,
|
|
"url": html_url,
|
|
"snippet": snippet,
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
# -- Backend registry -------------------------------------------------------
|
|
|
|
_BACKENDS: dict[str, callable] = {
|
|
"pb": _fetch_pastebin,
|
|
"gh": _fetch_gists,
|
|
}
|
|
|
|
|
|
# -- Polling -----------------------------------------------------------------
|
|
|
|
async def _poll_once(bot, key: str, announce: bool = True) -> None:
|
|
"""Single poll cycle for one monitor (all backends)."""
|
|
ps = _ps(bot)
|
|
data = ps["monitors"].get(key)
|
|
if data is None:
|
|
data = _load(bot, key)
|
|
if data is None:
|
|
return
|
|
ps["monitors"][key] = data
|
|
|
|
keyword = data["keyword"]
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
data["last_poll"] = now
|
|
|
|
loop = asyncio.get_running_loop()
|
|
had_success = False
|
|
|
|
for tag, backend in _BACKENDS.items():
|
|
try:
|
|
items = await loop.run_in_executor(None, backend, keyword)
|
|
except Exception as exc:
|
|
_log.debug("pastemoni %s/%s error: %s", key, tag, exc)
|
|
data.setdefault("last_errors", {})[tag] = str(exc)
|
|
continue
|
|
|
|
had_success = True
|
|
data.setdefault("last_errors", {}).pop(tag, None)
|
|
|
|
seen_set = set(data.get("seen", {}).get(tag, []))
|
|
seen_list = list(data.get("seen", {}).get(tag, []))
|
|
new_items = [item for item in items if item["id"] not in seen_set]
|
|
|
|
if announce and new_items:
|
|
channel = data["channel"]
|
|
shown = new_items[:_MAX_ANNOUNCE]
|
|
for item in shown:
|
|
title = item.get("title") or "(untitled)"
|
|
snippet = item.get("snippet", "")
|
|
url = item.get("url", "")
|
|
if url:
|
|
url = await bot.shorten_url(url)
|
|
parts = [f"[{tag}] {title}"]
|
|
if snippet:
|
|
parts.append(snippet)
|
|
if url:
|
|
parts.append(url)
|
|
await bot.send(channel, " -- ".join(parts))
|
|
remaining = len(new_items) - len(shown)
|
|
if remaining > 0:
|
|
await bot.send(channel, f"[{tag}] ... and {remaining} more")
|
|
|
|
for item in new_items:
|
|
seen_list.append(item["id"])
|
|
if len(seen_list) > _MAX_SEEN:
|
|
seen_list = seen_list[-_MAX_SEEN:]
|
|
data.setdefault("seen", {})[tag] = seen_list
|
|
|
|
if had_success:
|
|
ps["errors"][key] = 0
|
|
else:
|
|
ps["errors"][key] = ps["errors"].get(key, 0) + 1
|
|
|
|
ps["monitors"][key] = data
|
|
_save(bot, key, data)
|
|
|
|
|
|
async def _poll_loop(bot, key: str) -> None:
|
|
"""Infinite poll loop for one monitor."""
|
|
try:
|
|
while True:
|
|
ps = _ps(bot)
|
|
data = ps["monitors"].get(key) or _load(bot, key)
|
|
if data is None:
|
|
return
|
|
interval = data.get("interval", _DEFAULT_INTERVAL)
|
|
errs = ps["errors"].get(key, 0)
|
|
if errs >= 5:
|
|
interval = min(interval * 2, _MAX_INTERVAL)
|
|
await asyncio.sleep(interval)
|
|
await _poll_once(bot, key, announce=True)
|
|
except asyncio.CancelledError:
|
|
pass
|
|
|
|
|
|
def _start_poller(bot, key: str) -> None:
|
|
"""Create and track a poller task."""
|
|
ps = _ps(bot)
|
|
existing = ps["pollers"].get(key)
|
|
if existing and not existing.done():
|
|
return
|
|
task = asyncio.create_task(_poll_loop(bot, key))
|
|
ps["pollers"][key] = task
|
|
|
|
|
|
def _stop_poller(bot, key: str) -> None:
|
|
"""Cancel and remove a poller task."""
|
|
ps = _ps(bot)
|
|
task = ps["pollers"].pop(key, None)
|
|
if task and not task.done():
|
|
task.cancel()
|
|
ps["monitors"].pop(key, None)
|
|
ps["errors"].pop(key, 0)
|
|
|
|
|
|
# -- Restore on connect -----------------------------------------------------
|
|
|
|
def _restore(bot) -> None:
|
|
"""Rebuild pollers from persisted state."""
|
|
ps = _ps(bot)
|
|
for key in bot.state.keys("pastemoni"):
|
|
existing = ps["pollers"].get(key)
|
|
if existing and not existing.done():
|
|
continue
|
|
data = _load(bot, key)
|
|
if data is None:
|
|
continue
|
|
ps["monitors"][key] = data
|
|
_start_poller(bot, key)
|
|
|
|
|
|
@event("001")
|
|
async def on_connect(bot, message):
|
|
"""Restore paste monitor pollers on connect."""
|
|
_restore(bot)
|
|
|
|
|
|
# -- Command handler ---------------------------------------------------------
|
|
|
|
@command("pastemoni", help="Paste monitor: !pastemoni add|del|list|check")
|
|
async def cmd_pastemoni(bot, message):
|
|
"""Per-channel paste site keyword monitoring.
|
|
|
|
Usage:
|
|
!pastemoni add <name> <keyword> Add monitor (admin)
|
|
!pastemoni del <name> Remove monitor (admin)
|
|
!pastemoni list List monitors
|
|
!pastemoni check <name> Force-poll now
|
|
"""
|
|
parts = message.text.split(None, 3)
|
|
if len(parts) < 2:
|
|
await bot.reply(message, "Usage: !pastemoni <add|del|list|check> [args]")
|
|
return
|
|
|
|
sub = parts[1].lower()
|
|
|
|
# -- list ----------------------------------------------------------------
|
|
if sub == "list":
|
|
if not message.is_channel:
|
|
await bot.reply(message, "Use this command in a channel")
|
|
return
|
|
channel = message.target
|
|
prefix = f"{channel}:"
|
|
monitors = []
|
|
for key in bot.state.keys("pastemoni"):
|
|
if key.startswith(prefix):
|
|
data = _load(bot, key)
|
|
if data:
|
|
name = data["name"]
|
|
keyword = data.get("keyword", "")
|
|
errs = data.get("last_errors", {})
|
|
entry = f"{name} ({keyword})"
|
|
if errs:
|
|
entry += f" [{len(errs)} errors]"
|
|
monitors.append(entry)
|
|
if not monitors:
|
|
await bot.reply(message, "No monitors in this channel")
|
|
return
|
|
await bot.reply(message, f"Monitors: {', '.join(monitors)}")
|
|
return
|
|
|
|
# -- check ---------------------------------------------------------------
|
|
if sub == "check":
|
|
if not message.is_channel:
|
|
await bot.reply(message, "Use this command in a channel")
|
|
return
|
|
if len(parts) < 3:
|
|
await bot.reply(message, "Usage: !pastemoni check <name>")
|
|
return
|
|
name = parts[2].lower()
|
|
channel = message.target
|
|
key = _state_key(channel, name)
|
|
data = _load(bot, key)
|
|
if data is None:
|
|
await bot.reply(message, f"No monitor '{name}' in this channel")
|
|
return
|
|
_ps(bot)["monitors"][key] = data
|
|
await _poll_once(bot, key, announce=True)
|
|
data = _ps(bot)["monitors"].get(key, data)
|
|
errs = data.get("last_errors", {})
|
|
if errs:
|
|
tags = ", ".join(sorted(errs))
|
|
await bot.reply(message, f"{name}: errors on {tags}")
|
|
else:
|
|
await bot.reply(message, f"{name}: checked")
|
|
return
|
|
|
|
# -- add (admin) ---------------------------------------------------------
|
|
if sub == "add":
|
|
if not bot._is_admin(message):
|
|
await bot.reply(message, "Permission denied: add requires admin")
|
|
return
|
|
if not message.is_channel:
|
|
await bot.reply(message, "Use this command in a channel")
|
|
return
|
|
if len(parts) < 4:
|
|
await bot.reply(message, "Usage: !pastemoni add <name> <keyword>")
|
|
return
|
|
|
|
name = parts[2].lower()
|
|
keyword = parts[3]
|
|
|
|
if not _validate_name(name):
|
|
await bot.reply(
|
|
message,
|
|
"Invalid name (lowercase alphanumeric + hyphens, 1-20 chars)",
|
|
)
|
|
return
|
|
|
|
channel = message.target
|
|
key = _state_key(channel, name)
|
|
|
|
if _load(bot, key) is not None:
|
|
await bot.reply(
|
|
message, f"Monitor '{name}' already exists in this channel",
|
|
)
|
|
return
|
|
|
|
ch_prefix = f"{channel}:"
|
|
count = sum(
|
|
1 for k in bot.state.keys("pastemoni") if k.startswith(ch_prefix)
|
|
)
|
|
if count >= _MAX_MONITORS:
|
|
await bot.reply(message, f"Monitor limit reached ({_MAX_MONITORS})")
|
|
return
|
|
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
data = {
|
|
"keyword": keyword,
|
|
"name": name,
|
|
"channel": channel,
|
|
"interval": _DEFAULT_INTERVAL,
|
|
"added_by": message.nick,
|
|
"added_at": now,
|
|
"last_poll": now,
|
|
"last_errors": {},
|
|
"seen": {},
|
|
}
|
|
_save(bot, key, data)
|
|
_ps(bot)["monitors"][key] = data
|
|
|
|
async def _seed():
|
|
await _poll_once(bot, key, announce=False)
|
|
_start_poller(bot, key)
|
|
|
|
asyncio.create_task(_seed())
|
|
|
|
await bot.reply(
|
|
message,
|
|
f"Monitor '{name}' added for: {keyword} (seeding in background)",
|
|
)
|
|
return
|
|
|
|
# -- del (admin) ---------------------------------------------------------
|
|
if sub == "del":
|
|
if not bot._is_admin(message):
|
|
await bot.reply(message, "Permission denied: del requires admin")
|
|
return
|
|
if not message.is_channel:
|
|
await bot.reply(message, "Use this command in a channel")
|
|
return
|
|
if len(parts) < 3:
|
|
await bot.reply(message, "Usage: !pastemoni del <name>")
|
|
return
|
|
|
|
name = parts[2].lower()
|
|
channel = message.target
|
|
key = _state_key(channel, name)
|
|
|
|
if _load(bot, key) is None:
|
|
await bot.reply(message, f"No monitor '{name}' in this channel")
|
|
return
|
|
|
|
_stop_poller(bot, key)
|
|
_delete(bot, key)
|
|
await bot.reply(message, f"Removed '{name}'")
|
|
return
|
|
|
|
await bot.reply(message, "Usage: !pastemoni <add|del|list|check> [args]")
|