feat: route plugin HTTP traffic through SOCKS5 proxy

Add PySocks dependency and shared src/derp/http.py module providing
proxy-aware urlopen() and build_opener() that route through
socks5h://127.0.0.1:1080. Subclassed SocksiPyHandler passes SSL
context through to HTTPS connections.

Swapped 14 external-facing plugins to use the proxied helpers.
Local-only traffic (SearXNG, raw DNS/TLS sockets) stays direct.
Updated test mocks in test_twitch and test_alert accordingly.
This commit is contained in:
user
2026-02-15 15:53:49 +01:00
parent 10f62631be
commit 97bbc6a825
19 changed files with 151 additions and 47 deletions

View File

@@ -5,10 +5,10 @@ from __future__ import annotations
import asyncio import asyncio
import json import json
import re import re
import ssl
import urllib.request import urllib.request
from datetime import datetime, timezone from datetime import datetime, timezone
from derp.http import urlopen as _urlopen
from derp.plugin import command, event from derp.plugin import command, event
# -- Constants --------------------------------------------------------------- # -- Constants ---------------------------------------------------------------
@@ -106,8 +106,7 @@ def _search_youtube(keyword: str) -> list[dict]:
req = urllib.request.Request(_YT_SEARCH_URL, data=payload, method="POST") req = urllib.request.Request(_YT_SEARCH_URL, data=payload, method="POST")
req.add_header("Content-Type", "application/json") req.add_header("Content-Type", "application/json")
ctx = ssl.create_default_context() resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT, context=ctx)
raw = resp.read() raw = resp.read()
resp.close() resp.close()
@@ -141,8 +140,7 @@ def _search_twitch(keyword: str) -> list[dict]:
req.add_header("Client-Id", _GQL_CLIENT_ID) req.add_header("Client-Id", _GQL_CLIENT_ID)
req.add_header("Content-Type", "application/json") req.add_header("Content-Type", "application/json")
ctx = ssl.create_default_context() resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT, context=ctx)
raw = resp.read() raw = resp.read()
resp.close() resp.close()

View File

@@ -13,6 +13,7 @@ import urllib.request
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from datetime import datetime, timezone from datetime import datetime, timezone
from derp.http import urlopen as _urlopen
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -32,7 +33,7 @@ def fetch_crtsh(domain: str) -> list[dict]:
"""GET crt.sh JSON for a domain. Blocking.""" """GET crt.sh JSON for a domain. Blocking."""
url = _CRTSH_URL.format(domain=domain) url = _CRTSH_URL.format(domain=domain)
req = urllib.request.Request(url, headers={"User-Agent": "derp-irc-bot"}) req = urllib.request.Request(url, headers={"User-Agent": "derp-irc-bot"})
with urllib.request.urlopen(req, timeout=_CRTSH_TIMEOUT) as resp: with _urlopen(req, timeout=_CRTSH_TIMEOUT) as resp:
return json.loads(resp.read()) return json.loads(resp.read())

View File

@@ -8,6 +8,7 @@ import re
import time import time
from pathlib import Path from pathlib import Path
from derp.http import urlopen as _urlopen
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -121,7 +122,7 @@ async def _download_nvd() -> tuple[int, str]:
def _fetch(url): def _fetch(url):
req = urllib.request.Request(url, headers={"User-Agent": "derp-bot"}) req = urllib.request.Request(url, headers={"User-Agent": "derp-bot"})
with urllib.request.urlopen(req, timeout=120) as resp: # noqa: S310 with _urlopen(req, timeout=120) as resp:
return resp.read() return resp.read()
try: try:

View File

@@ -7,6 +7,7 @@ import logging
import time import time
from pathlib import Path from pathlib import Path
from derp.http import urlopen as _urlopen
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -85,7 +86,7 @@ async def _download_csv() -> tuple[int, str]:
def _fetch(): def _fetch():
req = urllib.request.Request(_CSV_URL, headers={"User-Agent": "derp-bot"}) req = urllib.request.Request(_CSV_URL, headers={"User-Agent": "derp-bot"})
with urllib.request.urlopen(req, timeout=60) as resp: # noqa: S310 with _urlopen(req, timeout=60) as resp:
return resp.read() return resp.read()
try: try:

View File

@@ -8,6 +8,7 @@ import re
import ssl import ssl
import urllib.request import urllib.request
from derp.http import build_opener as _build_opener
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -84,9 +85,7 @@ def _fetch_headers(url: str) -> tuple[dict[str, str], str]:
ctx.check_hostname = False ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE ctx.verify_mode = ssl.CERT_NONE
opener = urllib.request.build_opener( opener = _build_opener(context=ctx)
urllib.request.HTTPSHandler(context=ctx),
)
req = urllib.request.Request(url, method="GET") req = urllib.request.Request(url, method="GET")
req.add_header("User-Agent", _USER_AGENT) req.add_header("User-Agent", _USER_AGENT)

View File

@@ -7,6 +7,7 @@ import ssl
import time import time
import urllib.request import urllib.request
from derp.http import build_opener as _build_opener
from derp.plugin import command from derp.plugin import command
_TIMEOUT = 10 _TIMEOUT = 10
@@ -41,10 +42,7 @@ def _check(url: str) -> dict:
ctx.check_hostname = False ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE ctx.verify_mode = ssl.CERT_NONE
opener = urllib.request.build_opener( opener = _build_opener(NoRedirect, context=ctx)
NoRedirect,
urllib.request.HTTPSHandler(context=ctx),
)
req = urllib.request.Request(url, method="HEAD") req = urllib.request.Request(url, method="HEAD")
req.add_header("User-Agent", _USER_AGENT) req.add_header("User-Agent", _USER_AGENT)

View File

@@ -7,6 +7,7 @@ import logging
import time import time
from pathlib import Path from pathlib import Path
from derp.http import urlopen as _urlopen
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -111,7 +112,7 @@ async def _download_feeds() -> tuple[int, int]:
async def _fetch_one(filename: str, url: str) -> bool: async def _fetch_one(filename: str, url: str) -> bool:
def _do(): def _do():
req = urllib.request.Request(url, headers={"User-Agent": "derp-bot"}) req = urllib.request.Request(url, headers={"User-Agent": "derp-bot"})
with urllib.request.urlopen(req, timeout=30) as resp: # noqa: S310 with _urlopen(req, timeout=30) as resp:
return resp.read() return resp.read()
try: try:

View File

@@ -5,12 +5,12 @@ from __future__ import annotations
import asyncio import asyncio
import json import json
import re import re
import ssl
import urllib.request import urllib.request
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from datetime import datetime, timezone from datetime import datetime, timezone
from urllib.parse import urlparse from urllib.parse import urlparse
from derp.http import urlopen as _urlopen
from derp.plugin import command, event from derp.plugin import command, event
# -- Constants --------------------------------------------------------------- # -- Constants ---------------------------------------------------------------
@@ -111,10 +111,8 @@ def _fetch_feed(url: str, etag: str = "", last_modified: str = "") -> dict:
if last_modified: if last_modified:
req.add_header("If-Modified-Since", last_modified) req.add_header("If-Modified-Since", last_modified)
ctx = ssl.create_default_context()
try: try:
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT, context=ctx) resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
result["status"] = resp.status result["status"] = resp.status
result["body"] = resp.read() result["body"] = resp.read()
result["etag"] = resp.headers.get("ETag", "") result["etag"] = resp.headers.get("ETag", "")

View File

@@ -12,6 +12,7 @@ import socket
import struct import struct
import urllib.request import urllib.request
from derp.http import urlopen as _urlopen
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -130,7 +131,7 @@ def _fetch_crtsh(domain: str) -> set[str]:
"""Fetch subdomains from crt.sh CT logs. Blocking.""" """Fetch subdomains from crt.sh CT logs. Blocking."""
url = _CRTSH_URL.format(domain=domain) url = _CRTSH_URL.format(domain=domain)
req = urllib.request.Request(url, headers={"User-Agent": "derp-bot"}) req = urllib.request.Request(url, headers={"User-Agent": "derp-bot"})
with urllib.request.urlopen(req, timeout=_CRTSH_TIMEOUT) as resp: # noqa: S310 with _urlopen(req, timeout=_CRTSH_TIMEOUT) as resp:
data = json.loads(resp.read()) data = json.loads(resp.read())
subs: set[str] = set() subs: set[str] = set()

View File

@@ -7,6 +7,7 @@ import logging
import time import time
from pathlib import Path from pathlib import Path
from derp.http import urlopen as _urlopen
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -66,7 +67,7 @@ async def _download_exits() -> int:
def _fetch(): def _fetch():
req = urllib.request.Request(_TOR_EXIT_URL, headers={"User-Agent": "derp-bot"}) req = urllib.request.Request(_TOR_EXIT_URL, headers={"User-Agent": "derp-bot"})
with urllib.request.urlopen(req, timeout=30) as resp: # noqa: S310 with _urlopen(req, timeout=30) as resp:
return resp.read().decode("utf-8", errors="replace") return resp.read().decode("utf-8", errors="replace")
try: try:

View File

@@ -5,10 +5,10 @@ from __future__ import annotations
import asyncio import asyncio
import json import json
import re import re
import ssl
import urllib.request import urllib.request
from datetime import datetime, timezone from datetime import datetime, timezone
from derp.http import urlopen as _urlopen
from derp.plugin import command, event from derp.plugin import command, event
# -- Constants --------------------------------------------------------------- # -- Constants ---------------------------------------------------------------
@@ -79,10 +79,8 @@ def _query_stream(login: str) -> dict:
req.add_header("Client-Id", _GQL_CLIENT_ID) req.add_header("Client-Id", _GQL_CLIENT_ID)
req.add_header("Content-Type", "application/json") req.add_header("Content-Type", "application/json")
ctx = ssl.create_default_context()
try: try:
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT, context=ctx) resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
raw = resp.read() raw = resp.read()
resp.close() resp.close()
data = json.loads(raw) data = json.loads(raw)

View File

@@ -17,6 +17,7 @@ import urllib.request
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass from dataclasses import dataclass
from derp.http import urlopen as _urlopen
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -94,7 +95,7 @@ def _http_get(url: str, timeout: int = _TIMEOUT) -> tuple[int, str]:
req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT}) req = urllib.request.Request(url, headers={"User-Agent": _USER_AGENT})
try: try:
with urllib.request.urlopen(req, timeout=timeout, context=ctx) as resp: with _urlopen(req, timeout=timeout, context=ctx) as resp:
body = resp.read().decode("utf-8", errors="replace") body = resp.read().decode("utf-8", errors="replace")
return resp.status, body return resp.status, body
except urllib.error.HTTPError as exc: except urllib.error.HTTPError as exc:

View File

@@ -7,6 +7,7 @@ import json
import logging import logging
import urllib.request import urllib.request
from derp.http import urlopen as _urlopen
from derp.plugin import command from derp.plugin import command
log = logging.getLogger(__name__) log = logging.getLogger(__name__)
@@ -28,7 +29,7 @@ def _lookup(url: str, timestamp: str = "") -> dict:
) )
try: try:
resp = urllib.request.urlopen(req, timeout=_TIMEOUT) resp = _urlopen(req, timeout=_TIMEOUT)
data = json.loads(resp.read().decode("utf-8")) data = json.loads(resp.read().decode("utf-8"))
resp.close() resp.close()
return data return data

View File

@@ -5,12 +5,12 @@ from __future__ import annotations
import asyncio import asyncio
import json import json
import re import re
import ssl
import urllib.request import urllib.request
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
from datetime import datetime, timezone from datetime import datetime, timezone
from urllib.parse import urlparse from urllib.parse import urlparse
from derp.http import urlopen as _urlopen
from derp.plugin import command, event from derp.plugin import command, event
# -- Constants --------------------------------------------------------------- # -- Constants ---------------------------------------------------------------
@@ -97,9 +97,8 @@ def _resolve_channel(url: str) -> str | None:
""" """
req = urllib.request.Request(url, method="GET") req = urllib.request.Request(url, method="GET")
req.add_header("User-Agent", _BROWSER_UA) req.add_header("User-Agent", _BROWSER_UA)
ctx = ssl.create_default_context()
try: try:
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT, context=ctx) resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
body = resp.read(1_048_576) # Read up to 1MB body = resp.read(1_048_576) # Read up to 1MB
resp.close() resp.close()
except Exception: except Exception:
@@ -128,10 +127,8 @@ def _fetch_feed(url: str, etag: str = "", last_modified: str = "") -> dict:
if last_modified: if last_modified:
req.add_header("If-Modified-Since", last_modified) req.add_header("If-Modified-Since", last_modified)
ctx = ssl.create_default_context()
try: try:
resp = urllib.request.urlopen(req, timeout=_FETCH_TIMEOUT, context=ctx) resp = _urlopen(req, timeout=_FETCH_TIMEOUT)
result["status"] = resp.status result["status"] = resp.status
result["body"] = resp.read() result["body"] = resp.read()
result["etag"] = resp.headers.get("ETag", "") result["etag"] = resp.headers.get("ETag", "")

View File

@@ -10,6 +10,7 @@ requires-python = ">=3.11"
license = "MIT" license = "MIT"
dependencies = [ dependencies = [
"maxminddb>=2.0", "maxminddb>=2.0",
"PySocks>=1.7.1",
] ]
[project.scripts] [project.scripts]

48
src/derp/http.py Normal file
View File

@@ -0,0 +1,48 @@
"""Proxy-aware HTTP helpers -- routes outbound traffic through SOCKS5."""
import socket
import ssl
import urllib.request
from socks import SOCKS5
from sockshandler import SocksiPyConnectionS, SocksiPyHandler
_PROXY_ADDR = "127.0.0.1"
_PROXY_PORT = 1080
class _ProxyHandler(SocksiPyHandler, urllib.request.HTTPSHandler):
"""SOCKS5 handler that forwards SSL context to HTTPS connections."""
def __init__(self, context=None):
self._ssl_context = context or ssl.create_default_context()
SocksiPyHandler.__init__(self, SOCKS5, _PROXY_ADDR, _PROXY_PORT, True)
def https_open(self, req):
"""Open HTTPS connection through SOCKS5 with SSL context."""
ctx = self._ssl_context
def build(host, port=None, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, **kwargs):
conn = SocksiPyConnectionS(
*self.args, host=host, port=port, context=ctx, **self.kw,
)
conn.timeout = timeout
return conn
return self.do_open(build, req)
def urlopen(req, *, timeout=None, context=None):
"""Proxy-aware drop-in for urllib.request.urlopen."""
handler = _ProxyHandler(context=context)
opener = urllib.request.build_opener(handler)
kwargs = {}
if timeout is not None:
kwargs["timeout"] = timeout
return opener.open(req, **kwargs)
def build_opener(*handlers, context=None):
"""Proxy-aware drop-in for urllib.request.build_opener."""
proxy = _ProxyHandler(context=context)
return urllib.request.build_opener(proxy, *handlers)

View File

@@ -370,7 +370,7 @@ class TestExtractVideos:
def close(self): def close(self):
pass pass
with patch("urllib.request.urlopen", return_value=FakeResp()): with patch.object(_mod, "_urlopen", return_value=FakeResp()):
results = _search_youtube("test") results = _search_youtube("test")
assert len(results) == 1 assert len(results) == 1
assert results[0]["id"] == "dup1" assert results[0]["id"] == "dup1"
@@ -388,7 +388,7 @@ class TestSearchYoutube:
def close(self): def close(self):
pass pass
with patch("urllib.request.urlopen", return_value=FakeResp()): with patch.object(_mod, "_urlopen", return_value=FakeResp()):
results = _search_youtube("test query") results = _search_youtube("test query")
assert len(results) == 2 assert len(results) == 2
assert results[0]["id"] == "abc123" assert results[0]["id"] == "abc123"
@@ -396,7 +396,7 @@ class TestSearchYoutube:
def test_http_error_propagates(self): def test_http_error_propagates(self):
import pytest import pytest
with patch("urllib.request.urlopen", side_effect=ConnectionError("fail")): with patch.object(_mod, "_urlopen", side_effect=ConnectionError("fail")):
with pytest.raises(ConnectionError): with pytest.raises(ConnectionError):
_search_youtube("test") _search_youtube("test")
@@ -413,7 +413,7 @@ class TestSearchTwitch:
def close(self): def close(self):
pass pass
with patch("urllib.request.urlopen", return_value=FakeResp()): with patch.object(_mod, "_urlopen", return_value=FakeResp()):
results = _search_twitch("minecraft") results = _search_twitch("minecraft")
assert len(results) == 2 assert len(results) == 2
# Stream # Stream
@@ -435,7 +435,7 @@ class TestSearchTwitch:
def close(self): def close(self):
pass pass
with patch("urllib.request.urlopen", return_value=FakeResp()): with patch.object(_mod, "_urlopen", return_value=FakeResp()):
results = _search_twitch("nothing") results = _search_twitch("nothing")
assert results == [] assert results == []
@@ -448,13 +448,13 @@ class TestSearchTwitch:
def close(self): def close(self):
pass pass
with patch("urllib.request.urlopen", return_value=FakeResp()): with patch.object(_mod, "_urlopen", return_value=FakeResp()):
results = _search_twitch("bad") results = _search_twitch("bad")
assert results == [] assert results == []
def test_http_error_propagates(self): def test_http_error_propagates(self):
import pytest import pytest
with patch("urllib.request.urlopen", side_effect=ConnectionError("fail")): with patch.object(_mod, "_urlopen", side_effect=ConnectionError("fail")):
with pytest.raises(ConnectionError): with pytest.raises(ConnectionError):
_search_twitch("test") _search_twitch("test")
@@ -482,7 +482,7 @@ class TestSearchTwitch:
def close(self): def close(self):
pass pass
with patch("urllib.request.urlopen", return_value=FakeResp()): with patch.object(_mod, "_urlopen", return_value=FakeResp()):
results = _search_twitch("chat") results = _search_twitch("chat")
assert len(results) == 1 assert len(results) == 1
assert "()" not in results[0]["title"] assert "()" not in results[0]["title"]

59
tests/test_http.py Normal file
View File

@@ -0,0 +1,59 @@
"""Tests for the SOCKS5 proxy HTTP module."""
import ssl
import urllib.request
from socks import SOCKS5
from derp.http import _PROXY_ADDR, _PROXY_PORT, _ProxyHandler, build_opener
class TestProxyHandler:
def test_uses_socks5(self):
handler = _ProxyHandler()
assert handler.args[0] == SOCKS5
def test_proxy_address(self):
handler = _ProxyHandler()
assert handler.args[1] == _PROXY_ADDR
assert handler.args[2] == _PROXY_PORT
def test_rdns_enabled(self):
handler = _ProxyHandler()
assert handler.args[3] is True
def test_default_ssl_context(self):
handler = _ProxyHandler()
assert isinstance(handler._ssl_context, ssl.SSLContext)
def test_custom_ssl_context(self):
ctx = ssl.create_default_context()
ctx.check_hostname = False
handler = _ProxyHandler(context=ctx)
assert handler._ssl_context is ctx
def test_is_https_handler(self):
handler = _ProxyHandler()
assert isinstance(handler, urllib.request.HTTPSHandler)
class TestBuildOpener:
def test_includes_proxy_handler(self):
opener = build_opener()
proxy = [h for h in opener.handlers if isinstance(h, _ProxyHandler)]
assert len(proxy) == 1
def test_passes_extra_handlers(self):
class Custom(urllib.request.HTTPRedirectHandler):
pass
opener = build_opener(Custom)
custom = [h for h in opener.handlers if isinstance(h, Custom)]
assert len(custom) == 1
def test_passes_ssl_context(self):
ctx = ssl.create_default_context()
ctx.check_hostname = False
opener = build_opener(context=ctx)
proxy = [h for h in opener.handlers if isinstance(h, _ProxyHandler)][0]
assert proxy._ssl_context is ctx

View File

@@ -345,7 +345,7 @@ class TestQueryStream:
"""Test _query_stream response parsing with mocked HTTP.""" """Test _query_stream response parsing with mocked HTTP."""
def test_live_response(self): def test_live_response(self):
with patch("urllib.request.urlopen", return_value=_FakeGqlResp(GQL_LIVE)): with patch.object(_mod, "_urlopen", return_value=_FakeGqlResp(GQL_LIVE)):
result = _mod._query_stream("xqc") result = _mod._query_stream("xqc")
assert result["exists"] is True assert result["exists"] is True
assert result["live"] is True assert result["live"] is True
@@ -358,7 +358,7 @@ class TestQueryStream:
assert result["error"] == "" assert result["error"] == ""
def test_offline_response(self): def test_offline_response(self):
with patch("urllib.request.urlopen", return_value=_FakeGqlResp(GQL_OFFLINE)): with patch.object(_mod, "_urlopen", return_value=_FakeGqlResp(GQL_OFFLINE)):
result = _mod._query_stream("xqc") result = _mod._query_stream("xqc")
assert result["exists"] is True assert result["exists"] is True
assert result["live"] is False assert result["live"] is False
@@ -366,20 +366,20 @@ class TestQueryStream:
assert result["stream_id"] == "" assert result["stream_id"] == ""
def test_not_found_response(self): def test_not_found_response(self):
with patch("urllib.request.urlopen", return_value=_FakeGqlResp(GQL_NOT_FOUND)): with patch.object(_mod, "_urlopen", return_value=_FakeGqlResp(GQL_NOT_FOUND)):
result = _mod._query_stream("nobody") result = _mod._query_stream("nobody")
assert result["exists"] is False assert result["exists"] is False
assert result["live"] is False assert result["live"] is False
def test_no_game_response(self): def test_no_game_response(self):
with patch("urllib.request.urlopen", return_value=_FakeGqlResp(GQL_LIVE_NO_GAME)): with patch.object(_mod, "_urlopen", return_value=_FakeGqlResp(GQL_LIVE_NO_GAME)):
result = _mod._query_stream("streamer") result = _mod._query_stream("streamer")
assert result["exists"] is True assert result["exists"] is True
assert result["live"] is True assert result["live"] is True
assert result["game"] == "" assert result["game"] == ""
def test_network_error(self): def test_network_error(self):
with patch("urllib.request.urlopen", side_effect=Exception("timeout")): with patch.object(_mod, "_urlopen", side_effect=Exception("timeout")):
result = _mod._query_stream("xqc") result = _mod._query_stream("xqc")
assert result["error"] == "timeout" assert result["error"] == "timeout"
assert result["exists"] is False assert result["exists"] is False