Files
derp/plugins/wayback.py
user b48c289403 feat: add wayback plugin (Wayback Machine lookup)
Query Wayback Machine availability API via urllib + executor.
Supports optional timestamp parameter for date-targeted lookups.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 03:27:13 +01:00

86 lines
2.4 KiB
Python

"""Plugin: Wayback Machine snapshot lookup."""
from __future__ import annotations
import asyncio
import json
import logging
import urllib.request
from derp.plugin import command
log = logging.getLogger(__name__)
_API_URL = "https://archive.org/wayback/available"
_TIMEOUT = 10
_USER_AGENT = "derp/1.0"
def _lookup(url: str, timestamp: str = "") -> dict:
"""Blocking Wayback Machine availability check."""
params = f"url={url}"
if timestamp:
params += f"&timestamp={timestamp}"
req = urllib.request.Request(
f"{_API_URL}?{params}",
headers={"User-Agent": _USER_AGENT},
)
try:
resp = urllib.request.urlopen(req, timeout=_TIMEOUT)
data = json.loads(resp.read().decode("utf-8"))
resp.close()
return data
except Exception as exc:
return {"error": str(exc)[:100]}
@command("wayback", help="Wayback Machine: !wayback <url> [YYYYMMDD]")
async def cmd_wayback(bot, message):
"""Check Wayback Machine for archived snapshots.
Usage:
!wayback example.com
!wayback https://example.com/page 20240101
"""
parts = message.text.split(None, 3)
if len(parts) < 2:
await bot.reply(message, "Usage: !wayback <url> [YYYYMMDD]")
return
url = parts[1]
if not url.startswith(("http://", "https://")):
url = f"https://{url}"
timestamp = parts[2] if len(parts) >= 3 else ""
loop = asyncio.get_running_loop()
result = await loop.run_in_executor(None, _lookup, url, timestamp)
if "error" in result:
await bot.reply(message, f"{url} -> error: {result['error']}")
return
snapshots = result.get("archived_snapshots", {})
closest = snapshots.get("closest")
if not closest:
await bot.reply(message, f"{url} -> no snapshots found")
return
snap_url = closest.get("url", "")
snap_ts = closest.get("timestamp", "")
snap_status = closest.get("status", "")
available = closest.get("available", False)
# Format timestamp: YYYYMMDDHHMMSS -> YYYY-MM-DD HH:MM:SS
ts_fmt = snap_ts
if len(snap_ts) >= 14:
ts_fmt = (f"{snap_ts[:4]}-{snap_ts[4:6]}-{snap_ts[6:8]}"
f" {snap_ts[8:10]}:{snap_ts[10:12]}:{snap_ts[12:14]}")
status = f"{snap_status} " if snap_status else ""
avail = "available" if available else "unavailable"
await bot.reply(message, f"{url} -> {avail} | {status}{ts_fmt} | {snap_url}")