Query Wayback Machine availability API via urllib + executor. Supports optional timestamp parameter for date-targeted lookups. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
86 lines
2.4 KiB
Python
86 lines
2.4 KiB
Python
"""Plugin: Wayback Machine snapshot lookup."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import urllib.request
|
|
|
|
from derp.plugin import command
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_API_URL = "https://archive.org/wayback/available"
|
|
_TIMEOUT = 10
|
|
_USER_AGENT = "derp/1.0"
|
|
|
|
|
|
def _lookup(url: str, timestamp: str = "") -> dict:
|
|
"""Blocking Wayback Machine availability check."""
|
|
params = f"url={url}"
|
|
if timestamp:
|
|
params += f"×tamp={timestamp}"
|
|
|
|
req = urllib.request.Request(
|
|
f"{_API_URL}?{params}",
|
|
headers={"User-Agent": _USER_AGENT},
|
|
)
|
|
|
|
try:
|
|
resp = urllib.request.urlopen(req, timeout=_TIMEOUT)
|
|
data = json.loads(resp.read().decode("utf-8"))
|
|
resp.close()
|
|
return data
|
|
except Exception as exc:
|
|
return {"error": str(exc)[:100]}
|
|
|
|
|
|
@command("wayback", help="Wayback Machine: !wayback <url> [YYYYMMDD]")
|
|
async def cmd_wayback(bot, message):
|
|
"""Check Wayback Machine for archived snapshots.
|
|
|
|
Usage:
|
|
!wayback example.com
|
|
!wayback https://example.com/page 20240101
|
|
"""
|
|
parts = message.text.split(None, 3)
|
|
if len(parts) < 2:
|
|
await bot.reply(message, "Usage: !wayback <url> [YYYYMMDD]")
|
|
return
|
|
|
|
url = parts[1]
|
|
if not url.startswith(("http://", "https://")):
|
|
url = f"https://{url}"
|
|
|
|
timestamp = parts[2] if len(parts) >= 3 else ""
|
|
|
|
loop = asyncio.get_running_loop()
|
|
result = await loop.run_in_executor(None, _lookup, url, timestamp)
|
|
|
|
if "error" in result:
|
|
await bot.reply(message, f"{url} -> error: {result['error']}")
|
|
return
|
|
|
|
snapshots = result.get("archived_snapshots", {})
|
|
closest = snapshots.get("closest")
|
|
|
|
if not closest:
|
|
await bot.reply(message, f"{url} -> no snapshots found")
|
|
return
|
|
|
|
snap_url = closest.get("url", "")
|
|
snap_ts = closest.get("timestamp", "")
|
|
snap_status = closest.get("status", "")
|
|
available = closest.get("available", False)
|
|
|
|
# Format timestamp: YYYYMMDDHHMMSS -> YYYY-MM-DD HH:MM:SS
|
|
ts_fmt = snap_ts
|
|
if len(snap_ts) >= 14:
|
|
ts_fmt = (f"{snap_ts[:4]}-{snap_ts[4:6]}-{snap_ts[6:8]}"
|
|
f" {snap_ts[8:10]}:{snap_ts[10:12]}:{snap_ts[12:14]}")
|
|
|
|
status = f"{snap_status} " if snap_status else ""
|
|
avail = "available" if available else "unavailable"
|
|
await bot.reply(message, f"{url} -> {avail} | {status}{ts_fmt} | {snap_url}")
|