feat: connection pooling via urllib3 + batch OG fetching

Replace per-request SOCKS5+TLS handshakes with urllib3 SOCKSProxyManager
connection pool (20 pools, 4 conns/host). Batch _fetch_og calls via
ThreadPoolExecutor to parallelize OG tag enrichment in alert polling.
Cache flaskpaste SSL context at module level.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
user
2026-02-17 20:52:22 +01:00
parent e11994f320
commit 94f563d55a
8 changed files with 291 additions and 20 deletions

View File

@@ -330,6 +330,23 @@ def _fetch_og(url: str) -> tuple[str, str, str]:
return "", "", ""
def _fetch_og_batch(urls: list[str]) -> dict[str, tuple[str, str, str]]:
"""Fetch OG tags for multiple URLs concurrently.
Returns {url: (og_title, og_description, date)} for each input URL.
"""
from concurrent.futures import ThreadPoolExecutor, as_completed
if not urls:
return {}
results: dict[str, tuple[str, str, str]] = {}
with ThreadPoolExecutor(max_workers=min(len(urls), 8)) as pool:
futures = {pool.submit(_fetch_og, url): url for url in urls}
for fut in as_completed(futures):
results[futures[fut]] = fut.result()
return results
# -- YouTube InnerTube search (blocking) ------------------------------------
def _extract_videos(obj: object, depth: int = 0) -> list[dict]:
@@ -1753,26 +1770,41 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
# Filter: only announce results that actually contain the keyword
# Check title/URL first, then fall back to og:title/og:description
kw_lower = keyword.lower()
# Collect URLs that need OG enrichment (batch fetch)
urls_needing_og: set[str] = set()
for item in new_items:
title_l = item.get("title", "").lower()
url_l = item.get("url", "").lower()
if kw_lower in title_l or kw_lower in url_l:
# Title/URL match -- only need OG for date enrichment
if not item.get("date") and item.get("url"):
urls_needing_og.add(item["url"])
elif item.get("url"):
# No title/URL match -- need OG for keyword fallback
urls_needing_og.add(item["url"])
og_cache: dict[str, tuple[str, str, str]] = {}
if urls_needing_og:
og_cache = await loop.run_in_executor(
None, _fetch_og_batch, list(urls_needing_og),
)
matched = []
for item in new_items:
title_l = item.get("title", "").lower()
url_l = item.get("url", "").lower()
if kw_lower in title_l or kw_lower in url_l:
# Fetch OG tags for date if backend didn't provide one
if not item.get("date") and item.get("url"):
_, _, og_date = await loop.run_in_executor(
None, _fetch_og, item["url"],
)
_, _, og_date = og_cache.get(item["url"], ("", "", ""))
if og_date:
item["date"] = og_date
matched.append(item)
continue
# Fetch OG tags for items that didn't match on title/URL
# Check OG tags for keyword match
item_url = item.get("url", "")
if item_url:
og_title, og_desc, og_date = await loop.run_in_executor(
None, _fetch_og, item_url,
)
og_title, og_desc, og_date = og_cache.get(item_url, ("", "", ""))
if (kw_lower in og_title.lower()
or kw_lower in og_desc.lower()):
if og_title and len(og_title) > len(item.get("title", "")):

View File

@@ -34,14 +34,23 @@ def _has_client_cert() -> bool:
return (_CERT_DIR / "derp.crt").exists() and (_CERT_DIR / "derp.key").exists()
_cached_ssl_ctx: ssl.SSLContext | None = None
def _ssl_context() -> ssl.SSLContext:
"""Build SSL context, loading client cert for mTLS if available."""
ctx = ssl.create_default_context()
cert_path = _CERT_DIR / "derp.crt"
key_path = _CERT_DIR / "derp.key"
if cert_path.exists() and key_path.exists():
ctx.load_cert_chain(str(cert_path), str(key_path))
return ctx
"""Build SSL context, loading client cert for mTLS if available.
Cached at module level -- cert files are static at runtime.
"""
global _cached_ssl_ctx
if _cached_ssl_ctx is None:
ctx = ssl.create_default_context()
cert_path = _CERT_DIR / "derp.crt"
key_path = _CERT_DIR / "derp.key"
if cert_path.exists() and key_path.exists():
ctx.load_cert_chain(str(cert_path), str(key_path))
_cached_ssl_ctx = ctx
return _cached_ssl_ctx
def _solve_pow(nonce: str, difficulty: int) -> int: