feat: add wayback plugin (Wayback Machine lookup)
Query Wayback Machine availability API via urllib + executor. Supports optional timestamp parameter for date-targeted lookups. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
85
plugins/wayback.py
Normal file
85
plugins/wayback.py
Normal file
@@ -0,0 +1,85 @@
|
||||
"""Plugin: Wayback Machine snapshot lookup."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import urllib.request
|
||||
|
||||
from derp.plugin import command
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_API_URL = "https://archive.org/wayback/available"
|
||||
_TIMEOUT = 10
|
||||
_USER_AGENT = "derp/1.0"
|
||||
|
||||
|
||||
def _lookup(url: str, timestamp: str = "") -> dict:
|
||||
"""Blocking Wayback Machine availability check."""
|
||||
params = f"url={url}"
|
||||
if timestamp:
|
||||
params += f"×tamp={timestamp}"
|
||||
|
||||
req = urllib.request.Request(
|
||||
f"{_API_URL}?{params}",
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
)
|
||||
|
||||
try:
|
||||
resp = urllib.request.urlopen(req, timeout=_TIMEOUT)
|
||||
data = json.loads(resp.read().decode("utf-8"))
|
||||
resp.close()
|
||||
return data
|
||||
except Exception as exc:
|
||||
return {"error": str(exc)[:100]}
|
||||
|
||||
|
||||
@command("wayback", help="Wayback Machine: !wayback <url> [YYYYMMDD]")
|
||||
async def cmd_wayback(bot, message):
|
||||
"""Check Wayback Machine for archived snapshots.
|
||||
|
||||
Usage:
|
||||
!wayback example.com
|
||||
!wayback https://example.com/page 20240101
|
||||
"""
|
||||
parts = message.text.split(None, 3)
|
||||
if len(parts) < 2:
|
||||
await bot.reply(message, "Usage: !wayback <url> [YYYYMMDD]")
|
||||
return
|
||||
|
||||
url = parts[1]
|
||||
if not url.startswith(("http://", "https://")):
|
||||
url = f"https://{url}"
|
||||
|
||||
timestamp = parts[2] if len(parts) >= 3 else ""
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, _lookup, url, timestamp)
|
||||
|
||||
if "error" in result:
|
||||
await bot.reply(message, f"{url} -> error: {result['error']}")
|
||||
return
|
||||
|
||||
snapshots = result.get("archived_snapshots", {})
|
||||
closest = snapshots.get("closest")
|
||||
|
||||
if not closest:
|
||||
await bot.reply(message, f"{url} -> no snapshots found")
|
||||
return
|
||||
|
||||
snap_url = closest.get("url", "")
|
||||
snap_ts = closest.get("timestamp", "")
|
||||
snap_status = closest.get("status", "")
|
||||
available = closest.get("available", False)
|
||||
|
||||
# Format timestamp: YYYYMMDDHHMMSS -> YYYY-MM-DD HH:MM:SS
|
||||
ts_fmt = snap_ts
|
||||
if len(snap_ts) >= 14:
|
||||
ts_fmt = (f"{snap_ts[:4]}-{snap_ts[4:6]}-{snap_ts[6:8]}"
|
||||
f" {snap_ts[8:10]}:{snap_ts[10:12]}:{snap_ts[12:14]}")
|
||||
|
||||
status = f"{snap_status} " if snap_status else ""
|
||||
avail = "available" if available else "unavailable"
|
||||
await bot.reply(message, f"{url} -> {avail} | {status}{ts_fmt} | {snap_url}")
|
||||
Reference in New Issue
Block a user