diff --git a/plugins/wayback.py b/plugins/wayback.py new file mode 100644 index 0000000..292cd51 --- /dev/null +++ b/plugins/wayback.py @@ -0,0 +1,85 @@ +"""Plugin: Wayback Machine snapshot lookup.""" + +from __future__ import annotations + +import asyncio +import json +import logging +import urllib.request + +from derp.plugin import command + +log = logging.getLogger(__name__) + +_API_URL = "https://archive.org/wayback/available" +_TIMEOUT = 10 +_USER_AGENT = "derp/1.0" + + +def _lookup(url: str, timestamp: str = "") -> dict: + """Blocking Wayback Machine availability check.""" + params = f"url={url}" + if timestamp: + params += f"×tamp={timestamp}" + + req = urllib.request.Request( + f"{_API_URL}?{params}", + headers={"User-Agent": _USER_AGENT}, + ) + + try: + resp = urllib.request.urlopen(req, timeout=_TIMEOUT) + data = json.loads(resp.read().decode("utf-8")) + resp.close() + return data + except Exception as exc: + return {"error": str(exc)[:100]} + + +@command("wayback", help="Wayback Machine: !wayback [YYYYMMDD]") +async def cmd_wayback(bot, message): + """Check Wayback Machine for archived snapshots. + + Usage: + !wayback example.com + !wayback https://example.com/page 20240101 + """ + parts = message.text.split(None, 3) + if len(parts) < 2: + await bot.reply(message, "Usage: !wayback [YYYYMMDD]") + return + + url = parts[1] + if not url.startswith(("http://", "https://")): + url = f"https://{url}" + + timestamp = parts[2] if len(parts) >= 3 else "" + + loop = asyncio.get_running_loop() + result = await loop.run_in_executor(None, _lookup, url, timestamp) + + if "error" in result: + await bot.reply(message, f"{url} -> error: {result['error']}") + return + + snapshots = result.get("archived_snapshots", {}) + closest = snapshots.get("closest") + + if not closest: + await bot.reply(message, f"{url} -> no snapshots found") + return + + snap_url = closest.get("url", "") + snap_ts = closest.get("timestamp", "") + snap_status = closest.get("status", "") + available = closest.get("available", False) + + # Format timestamp: YYYYMMDDHHMMSS -> YYYY-MM-DD HH:MM:SS + ts_fmt = snap_ts + if len(snap_ts) >= 14: + ts_fmt = (f"{snap_ts[:4]}-{snap_ts[4:6]}-{snap_ts[6:8]}" + f" {snap_ts[8:10]}:{snap_ts[10:12]}:{snap_ts[12:14]}") + + status = f"{snap_status} " if snap_status else "" + avail = "available" if available else "unavailable" + await bot.reply(message, f"{url} -> {avail} | {status}{ts_fmt} | {snap_url}")