feat: connection pooling via urllib3 + batch OG fetching
Replace per-request SOCKS5+TLS handshakes with urllib3 SOCKSProxyManager connection pool (20 pools, 4 conns/host). Batch _fetch_og calls via ThreadPoolExecutor to parallelize OG tag enrichment in alert polling. Cache flaskpaste SSL context at module level. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -330,6 +330,23 @@ def _fetch_og(url: str) -> tuple[str, str, str]:
|
||||
return "", "", ""
|
||||
|
||||
|
||||
def _fetch_og_batch(urls: list[str]) -> dict[str, tuple[str, str, str]]:
|
||||
"""Fetch OG tags for multiple URLs concurrently.
|
||||
|
||||
Returns {url: (og_title, og_description, date)} for each input URL.
|
||||
"""
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
if not urls:
|
||||
return {}
|
||||
results: dict[str, tuple[str, str, str]] = {}
|
||||
with ThreadPoolExecutor(max_workers=min(len(urls), 8)) as pool:
|
||||
futures = {pool.submit(_fetch_og, url): url for url in urls}
|
||||
for fut in as_completed(futures):
|
||||
results[futures[fut]] = fut.result()
|
||||
return results
|
||||
|
||||
|
||||
# -- YouTube InnerTube search (blocking) ------------------------------------
|
||||
|
||||
def _extract_videos(obj: object, depth: int = 0) -> list[dict]:
|
||||
@@ -1753,26 +1770,41 @@ async def _poll_once(bot, key: str, announce: bool = True) -> None:
|
||||
# Filter: only announce results that actually contain the keyword
|
||||
# Check title/URL first, then fall back to og:title/og:description
|
||||
kw_lower = keyword.lower()
|
||||
|
||||
# Collect URLs that need OG enrichment (batch fetch)
|
||||
urls_needing_og: set[str] = set()
|
||||
for item in new_items:
|
||||
title_l = item.get("title", "").lower()
|
||||
url_l = item.get("url", "").lower()
|
||||
if kw_lower in title_l or kw_lower in url_l:
|
||||
# Title/URL match -- only need OG for date enrichment
|
||||
if not item.get("date") and item.get("url"):
|
||||
urls_needing_og.add(item["url"])
|
||||
elif item.get("url"):
|
||||
# No title/URL match -- need OG for keyword fallback
|
||||
urls_needing_og.add(item["url"])
|
||||
|
||||
og_cache: dict[str, tuple[str, str, str]] = {}
|
||||
if urls_needing_og:
|
||||
og_cache = await loop.run_in_executor(
|
||||
None, _fetch_og_batch, list(urls_needing_og),
|
||||
)
|
||||
|
||||
matched = []
|
||||
for item in new_items:
|
||||
title_l = item.get("title", "").lower()
|
||||
url_l = item.get("url", "").lower()
|
||||
if kw_lower in title_l or kw_lower in url_l:
|
||||
# Fetch OG tags for date if backend didn't provide one
|
||||
if not item.get("date") and item.get("url"):
|
||||
_, _, og_date = await loop.run_in_executor(
|
||||
None, _fetch_og, item["url"],
|
||||
)
|
||||
_, _, og_date = og_cache.get(item["url"], ("", "", ""))
|
||||
if og_date:
|
||||
item["date"] = og_date
|
||||
matched.append(item)
|
||||
continue
|
||||
# Fetch OG tags for items that didn't match on title/URL
|
||||
# Check OG tags for keyword match
|
||||
item_url = item.get("url", "")
|
||||
if item_url:
|
||||
og_title, og_desc, og_date = await loop.run_in_executor(
|
||||
None, _fetch_og, item_url,
|
||||
)
|
||||
og_title, og_desc, og_date = og_cache.get(item_url, ("", "", ""))
|
||||
if (kw_lower in og_title.lower()
|
||||
or kw_lower in og_desc.lower()):
|
||||
if og_title and len(og_title) > len(item.get("title", "")):
|
||||
|
||||
@@ -34,14 +34,23 @@ def _has_client_cert() -> bool:
|
||||
return (_CERT_DIR / "derp.crt").exists() and (_CERT_DIR / "derp.key").exists()
|
||||
|
||||
|
||||
_cached_ssl_ctx: ssl.SSLContext | None = None
|
||||
|
||||
|
||||
def _ssl_context() -> ssl.SSLContext:
|
||||
"""Build SSL context, loading client cert for mTLS if available."""
|
||||
ctx = ssl.create_default_context()
|
||||
cert_path = _CERT_DIR / "derp.crt"
|
||||
key_path = _CERT_DIR / "derp.key"
|
||||
if cert_path.exists() and key_path.exists():
|
||||
ctx.load_cert_chain(str(cert_path), str(key_path))
|
||||
return ctx
|
||||
"""Build SSL context, loading client cert for mTLS if available.
|
||||
|
||||
Cached at module level -- cert files are static at runtime.
|
||||
"""
|
||||
global _cached_ssl_ctx
|
||||
if _cached_ssl_ctx is None:
|
||||
ctx = ssl.create_default_context()
|
||||
cert_path = _CERT_DIR / "derp.crt"
|
||||
key_path = _CERT_DIR / "derp.key"
|
||||
if cert_path.exists() and key_path.exists():
|
||||
ctx.load_cert_chain(str(cert_path), str(key_path))
|
||||
_cached_ssl_ctx = ctx
|
||||
return _cached_ssl_ctx
|
||||
|
||||
|
||||
def _solve_pow(nonce: str, difficulty: int) -> int:
|
||||
|
||||
Reference in New Issue
Block a user