feat: add canary, tcping, archive, resolve plugins
canary: generate realistic fake credentials (token/aws/basic) for planting as canary tripwires. Per-channel state persistence. tcping: TCP connect latency probe through SOCKS5 proxy with min/avg/max reporting. Proxy-compatible alternative to traceroute. archive: save URLs to Wayback Machine via Save Page Now API, routed through SOCKS5 proxy. resolve: bulk DNS resolution (up to 10 hosts) via TCP DNS through SOCKS5 proxy with concurrent asyncio.gather. 83 new tests (1010 total), docs updated.
This commit is contained in:
105
plugins/archive.py
Normal file
105
plugins/archive.py
Normal file
@@ -0,0 +1,105 @@
|
||||
"""Plugin: Wayback Machine Save Page Now (SOCKS5-proxied)."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import urllib.error
|
||||
import urllib.request
|
||||
|
||||
from derp.http import urlopen as _urlopen
|
||||
from derp.plugin import command
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
_SAVE_URL = "https://web.archive.org/save/"
|
||||
_TIMEOUT = 30
|
||||
_USER_AGENT = "derp/1.0"
|
||||
|
||||
|
||||
def _save_page(url: str) -> dict:
|
||||
"""Blocking POST to Save Page Now. Returns result dict."""
|
||||
target = f"{_SAVE_URL}{url}"
|
||||
req = urllib.request.Request(
|
||||
target,
|
||||
headers={"User-Agent": _USER_AGENT},
|
||||
)
|
||||
|
||||
try:
|
||||
resp = _urlopen(req, timeout=_TIMEOUT)
|
||||
# The save endpoint returns a redirect to the archived page.
|
||||
# With urllib3 pooled requests, redirects are followed automatically.
|
||||
final_url = getattr(resp, "geturl", lambda: None)()
|
||||
headers = resp.headers if hasattr(resp, "headers") else {}
|
||||
|
||||
# Check for Content-Location or Link header with archived URL
|
||||
content_location = None
|
||||
if hasattr(headers, "get"):
|
||||
content_location = headers.get("Content-Location", "")
|
||||
link = headers.get("Link", "")
|
||||
else:
|
||||
content_location = ""
|
||||
link = ""
|
||||
|
||||
resp.read()
|
||||
|
||||
# Try Content-Location first (most reliable)
|
||||
if content_location and "/web/" in content_location:
|
||||
if content_location.startswith("/"):
|
||||
return {"url": f"https://web.archive.org{content_location}"}
|
||||
return {"url": content_location}
|
||||
|
||||
# Try final URL after redirects
|
||||
if final_url and "/web/" in final_url:
|
||||
return {"url": final_url}
|
||||
|
||||
# Try Link header
|
||||
if link and "/web/" in link:
|
||||
# Extract URL from Link header: <url>; rel="memento"
|
||||
for part in link.split(","):
|
||||
part = part.strip()
|
||||
if "/web/" in part and "<" in part:
|
||||
extracted = part.split("<", 1)[1].split(">", 1)[0]
|
||||
return {"url": extracted}
|
||||
|
||||
# If we got a 200 but no archive URL, report success without link
|
||||
return {"url": f"https://web.archive.org/web/*/{url}"}
|
||||
|
||||
except urllib.error.HTTPError as exc:
|
||||
if exc.code == 429:
|
||||
return {"error": "rate limited -- try again later"}
|
||||
if exc.code == 523:
|
||||
return {"error": "origin unreachable"}
|
||||
return {"error": f"HTTP {exc.code}"}
|
||||
except (TimeoutError, OSError) as exc:
|
||||
return {"error": f"timeout: {exc}"}
|
||||
except Exception as exc:
|
||||
return {"error": str(exc)[:100]}
|
||||
|
||||
|
||||
@command("archive", help="Save to Wayback Machine: !archive <url>")
|
||||
async def cmd_archive(bot, message):
|
||||
"""Save a URL to the Wayback Machine via Save Page Now.
|
||||
|
||||
Usage:
|
||||
!archive https://example.com/page
|
||||
"""
|
||||
parts = message.text.split(None, 1)
|
||||
if len(parts) < 2:
|
||||
await bot.reply(message, "Usage: !archive <url>")
|
||||
return
|
||||
|
||||
url = parts[1].strip()
|
||||
if not url.startswith(("http://", "https://")):
|
||||
await bot.reply(message, "URL must start with http:// or https://")
|
||||
return
|
||||
|
||||
await bot.reply(message, f"Archiving {url}...")
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
result = await loop.run_in_executor(None, _save_page, url)
|
||||
|
||||
if "error" in result:
|
||||
await bot.reply(message, f"Archive failed: {result['error']}")
|
||||
else:
|
||||
await bot.reply(message, f"Archived: {result['url']}")
|
||||
Reference in New Issue
Block a user