feat: connection pooling via urllib3 + batch OG fetching

Replace per-request SOCKS5+TLS handshakes with urllib3 SOCKSProxyManager
connection pool (20 pools, 4 conns/host). Batch _fetch_og calls via
ThreadPoolExecutor to parallelize OG tag enrichment in alert polling.
Cache flaskpaste SSL context at module level.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-17 20:52:22 +01:00
parent e11994f320
commit 94f563d55a
8 changed files with 291 additions and 20 deletions

View File

@@ -5,19 +5,50 @@ import logging
import socket
import ssl
import time
import urllib.error
import urllib.request
import socks
import urllib3
from socks import SOCKS5
from sockshandler import SocksiPyConnectionS, SocksiPyHandler
from urllib3.contrib.socks import SOCKSProxyManager
_PROXY_ADDR = "127.0.0.1"
_PROXY_PORT = 1080
_MAX_RETRIES = 3
_RETRY_ERRORS = (ssl.SSLError, ConnectionError, TimeoutError, OSError)
_RETRY_ERRORS = (
ssl.SSLError, ConnectionError, TimeoutError, OSError,
urllib3.exceptions.HTTPError,
)
_log = logging.getLogger(__name__)
# -- Connection pool (urllib3) ------------------------------------------------
_pool: SOCKSProxyManager | None = None
# Allow redirects but no urllib3-level retries (we retry ourselves).
_POOL_RETRIES = urllib3.Retry(
total=10, connect=0, read=0, redirect=10, status=0, other=0,
)
def _get_pool() -> SOCKSProxyManager:
"""Lazy-init the SOCKS5 connection pool."""
global _pool
if _pool is None:
_pool = SOCKSProxyManager(
f"socks5h://{_PROXY_ADDR}:{_PROXY_PORT}/",
num_pools=20,
maxsize=4,
retries=_POOL_RETRIES,
)
return _pool
# -- Legacy opener (for build_opener / context= callers) ---------------------
_default_opener: urllib.request.OpenerDirector | None = None
@@ -52,12 +83,66 @@ class _ProxyHandler(SocksiPyHandler, urllib.request.HTTPSHandler):
return self.do_open(build, req)
# -- Public HTTP interface ---------------------------------------------------
def urlopen(req, *, timeout=None, context=None, retries=None):
"""Proxy-aware drop-in for urllib.request.urlopen.
Uses connection pooling via urllib3 for default requests.
Falls back to legacy opener for custom SSL context.
Retries on transient SSL/connection errors with exponential backoff.
"""
max_retries = retries if retries is not None else _MAX_RETRIES
# Custom SSL context -> fall back to opener (rare: username.py only)
if context is not None:
return _urlopen_legacy(req, timeout=timeout, context=context, retries=max_retries)
# Default path: pooled urllib3
pool = _get_pool()
if isinstance(req, str):
url, headers, body, method = req, {}, None, "GET"
else:
url = req.full_url
headers = dict(req.header_items())
body = req.data
method = req.get_method()
to = urllib3.Timeout(total=timeout) if timeout else urllib3.Timeout(total=30)
for attempt in range(max_retries):
try:
resp = pool.request(
method, url,
headers=headers,
body=body,
timeout=to,
preload_content=False,
)
if resp.status >= 400:
# Drain body so connection returns to pool, then raise
# urllib.error.HTTPError for backward compatibility.
resp.read()
raise urllib.error.HTTPError(
url, resp.status, resp.reason or "",
resp.headers, None,
)
return resp
except urllib.error.HTTPError:
raise
except _RETRY_ERRORS as exc:
if attempt + 1 >= max_retries:
raise
delay = 2 ** attempt
_log.debug("urlopen retry %d/%d after %s: %s",
attempt + 1, max_retries, type(exc).__name__, exc)
time.sleep(delay)
def _urlopen_legacy(req, *, timeout=None, context=None, retries=None):
"""Open URL through legacy opener (custom SSL context)."""
max_retries = retries if retries is not None else _MAX_RETRIES
opener = _get_opener(context)
kwargs = {}
if timeout is not None:
@@ -82,6 +167,8 @@ def build_opener(*handlers, context=None):
return urllib.request.build_opener(proxy, *handlers)
# -- Raw TCP helpers (unchanged) ---------------------------------------------
def create_connection(address, *, timeout=None):
"""SOCKS5-proxied drop-in for socket.create_connection.