canary: generate realistic fake credentials (token/aws/basic) for planting as canary tripwires. Per-channel state persistence. tcping: TCP connect latency probe through SOCKS5 proxy with min/avg/max reporting. Proxy-compatible alternative to traceroute. archive: save URLs to Wayback Machine via Save Page Now API, routed through SOCKS5 proxy. resolve: bulk DNS resolution (up to 10 hosts) via TCP DNS through SOCKS5 proxy with concurrent asyncio.gather. 83 new tests (1010 total), docs updated.
106 lines
3.4 KiB
Python
106 lines
3.4 KiB
Python
"""Plugin: Wayback Machine Save Page Now (SOCKS5-proxied)."""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import logging
|
|
import urllib.error
|
|
import urllib.request
|
|
|
|
from derp.http import urlopen as _urlopen
|
|
from derp.plugin import command
|
|
|
|
log = logging.getLogger(__name__)
|
|
|
|
_SAVE_URL = "https://web.archive.org/save/"
|
|
_TIMEOUT = 30
|
|
_USER_AGENT = "derp/1.0"
|
|
|
|
|
|
def _save_page(url: str) -> dict:
|
|
"""Blocking POST to Save Page Now. Returns result dict."""
|
|
target = f"{_SAVE_URL}{url}"
|
|
req = urllib.request.Request(
|
|
target,
|
|
headers={"User-Agent": _USER_AGENT},
|
|
)
|
|
|
|
try:
|
|
resp = _urlopen(req, timeout=_TIMEOUT)
|
|
# The save endpoint returns a redirect to the archived page.
|
|
# With urllib3 pooled requests, redirects are followed automatically.
|
|
final_url = getattr(resp, "geturl", lambda: None)()
|
|
headers = resp.headers if hasattr(resp, "headers") else {}
|
|
|
|
# Check for Content-Location or Link header with archived URL
|
|
content_location = None
|
|
if hasattr(headers, "get"):
|
|
content_location = headers.get("Content-Location", "")
|
|
link = headers.get("Link", "")
|
|
else:
|
|
content_location = ""
|
|
link = ""
|
|
|
|
resp.read()
|
|
|
|
# Try Content-Location first (most reliable)
|
|
if content_location and "/web/" in content_location:
|
|
if content_location.startswith("/"):
|
|
return {"url": f"https://web.archive.org{content_location}"}
|
|
return {"url": content_location}
|
|
|
|
# Try final URL after redirects
|
|
if final_url and "/web/" in final_url:
|
|
return {"url": final_url}
|
|
|
|
# Try Link header
|
|
if link and "/web/" in link:
|
|
# Extract URL from Link header: <url>; rel="memento"
|
|
for part in link.split(","):
|
|
part = part.strip()
|
|
if "/web/" in part and "<" in part:
|
|
extracted = part.split("<", 1)[1].split(">", 1)[0]
|
|
return {"url": extracted}
|
|
|
|
# If we got a 200 but no archive URL, report success without link
|
|
return {"url": f"https://web.archive.org/web/*/{url}"}
|
|
|
|
except urllib.error.HTTPError as exc:
|
|
if exc.code == 429:
|
|
return {"error": "rate limited -- try again later"}
|
|
if exc.code == 523:
|
|
return {"error": "origin unreachable"}
|
|
return {"error": f"HTTP {exc.code}"}
|
|
except (TimeoutError, OSError) as exc:
|
|
return {"error": f"timeout: {exc}"}
|
|
except Exception as exc:
|
|
return {"error": str(exc)[:100]}
|
|
|
|
|
|
@command("archive", help="Save to Wayback Machine: !archive <url>")
|
|
async def cmd_archive(bot, message):
|
|
"""Save a URL to the Wayback Machine via Save Page Now.
|
|
|
|
Usage:
|
|
!archive https://example.com/page
|
|
"""
|
|
parts = message.text.split(None, 1)
|
|
if len(parts) < 2:
|
|
await bot.reply(message, "Usage: !archive <url>")
|
|
return
|
|
|
|
url = parts[1].strip()
|
|
if not url.startswith(("http://", "https://")):
|
|
await bot.reply(message, "URL must start with http:// or https://")
|
|
return
|
|
|
|
await bot.reply(message, f"Archiving {url}...")
|
|
|
|
loop = asyncio.get_running_loop()
|
|
result = await loop.run_in_executor(None, _save_page, url)
|
|
|
|
if "error" in result:
|
|
await bot.reply(message, f"Archive failed: {result['error']}")
|
|
else:
|
|
await bot.reply(message, f"Archived: {result['url']}")
|