feat: replace HTTP health check with TLS handshake

Replace _http_check (HTTP GET to httpbin.org) with _tls_check that
performs a TLS handshake through the proxy chain. Multiple targets
(google, cloudflare, amazon) rotated round-robin eliminate the single
point of failure. Lighter, faster, harder to block than HTTP.

- Add test_targets config field (replaces test_url)
- Backward compat: legacy test_url extracts hostname automatically
- Add ssl.create_default_context() and round-robin index to ProxyPool
- Update docs (example.yaml, USAGE.md, CHEATSHEET.md)
This commit is contained in:
user
2026-02-17 18:26:21 +01:00
parent 3638c607da
commit e78fc8dc3c
7 changed files with 230 additions and 38 deletions

View File

@@ -31,7 +31,10 @@ chain:
# - file: /etc/s5p/proxies.txt # text file, one proxy URL per line # - file: /etc/s5p/proxies.txt # text file, one proxy URL per line
# refresh: 300 # re-fetch sources interval (seconds) # refresh: 300 # re-fetch sources interval (seconds)
# test_interval: 120 # health test cycle interval (seconds) # test_interval: 120 # health test cycle interval (seconds)
# test_url: http://httpbin.org/ip # URL for health checks # test_targets: # TLS handshake targets (round-robin)
# - www.google.com
# - www.cloudflare.com
# - www.amazon.com
# test_timeout: 15 # per-test timeout (seconds) # test_timeout: 15 # per-test timeout (seconds)
# test_concurrency: 5 # parallel health tests # test_concurrency: 5 # parallel health tests
# max_fails: 3 # consecutive fails before eviction # max_fails: 3 # consecutive fails before eviction

View File

@@ -57,6 +57,10 @@ proxy_pool:
- file: /etc/s5p/proxies.txt - file: /etc/s5p/proxies.txt
refresh: 300 # re-fetch interval refresh: 300 # re-fetch interval
test_interval: 120 # health test cycle test_interval: 120 # health test cycle
test_targets: # TLS handshake targets (round-robin)
- www.google.com
- www.cloudflare.com
- www.amazon.com
max_fails: 3 # evict after N fails max_fails: 3 # evict after N fails
report_url: "" # POST dead proxies (optional) report_url: "" # POST dead proxies (optional)
``` ```

View File

@@ -59,7 +59,10 @@ proxy_pool:
- file: /etc/s5p/proxies.txt - file: /etc/s5p/proxies.txt
refresh: 300 refresh: 300
test_interval: 120 test_interval: 120
test_url: http://httpbin.org/ip test_targets: # TLS handshake targets (round-robin)
- www.google.com
- www.cloudflare.com
- www.amazon.com
test_timeout: 15 test_timeout: 15
test_concurrency: 5 test_concurrency: 5
max_fails: 3 max_fails: 3
@@ -107,7 +110,10 @@ proxy_pool:
- file: /etc/s5p/proxies.txt # text file, one proxy URL per line - file: /etc/s5p/proxies.txt # text file, one proxy URL per line
refresh: 300 # re-fetch sources every 300 seconds refresh: 300 # re-fetch sources every 300 seconds
test_interval: 120 # health test cycle every 120 seconds test_interval: 120 # health test cycle every 120 seconds
test_url: http://httpbin.org/ip # URL for health checks test_targets: # TLS handshake targets (round-robin)
- www.google.com
- www.cloudflare.com
- www.amazon.com
test_timeout: 15 # per-test timeout (seconds) test_timeout: 15 # per-test timeout (seconds)
test_concurrency: 5 # parallel health tests test_concurrency: 5 # parallel health tests
max_fails: 3 # evict after N consecutive failures max_fails: 3 # evict after N consecutive failures
@@ -134,8 +140,9 @@ http://proxy.example.com:8080
### Health testing ### Health testing
Each cycle tests all proxies through the full chain (static chain + proxy) Each cycle tests all proxies through the full chain (static chain + proxy)
by sending an HTTP GET to `test_url`. Proxies are marked alive on `200` response. by performing a TLS handshake against one of the `test_targets` (rotated
After `max_fails` consecutive failures, a proxy is evicted. round-robin). A successful handshake marks the proxy alive. After `max_fails`
consecutive failures, a proxy is evicted.
Before each health test cycle, the static chain is tested without any pool Before each health test cycle, the static chain is tested without any pool
proxy. If the chain itself is unreachable (e.g., Tor is down), proxy tests proxy. If the chain itself is unreachable (e.g., Tor is down), proxy tests

View File

@@ -45,13 +45,26 @@ class ProxyPoolConfig:
sources: list[PoolSourceConfig] = field(default_factory=list) sources: list[PoolSourceConfig] = field(default_factory=list)
refresh: float = 300.0 refresh: float = 300.0
test_interval: float = 120.0 test_interval: float = 120.0
test_url: str = "http://httpbin.org/ip" test_url: str = "" # deprecated, kept for backward compat
test_targets: list[str] = field(default_factory=lambda: [
"www.google.com",
"www.cloudflare.com",
"www.amazon.com",
])
test_timeout: float = 15.0 test_timeout: float = 15.0
test_concurrency: int = 5 test_concurrency: int = 5
max_fails: int = 3 max_fails: int = 3
state_file: str = "" state_file: str = ""
report_url: str = "" report_url: str = ""
def __post_init__(self) -> None:
"""Backward compat: extract hostname from legacy test_url."""
defaults = ["www.google.com", "www.cloudflare.com", "www.amazon.com"]
if self.test_url and self.test_targets == defaults:
host = urlparse(self.test_url).hostname
if host:
self.test_targets = [host]
@dataclass @dataclass
class TorConfig: class TorConfig:
@@ -199,17 +212,20 @@ def load_config(path: str | Path) -> Config:
limit=src.get("limit", 1000), limit=src.get("limit", 1000),
) )
) )
config.proxy_pool = ProxyPoolConfig( kwargs: dict = {
sources=sources, "sources": sources,
refresh=float(pool_raw.get("refresh", 300)), "refresh": float(pool_raw.get("refresh", 300)),
test_interval=float(pool_raw.get("test_interval", 120)), "test_interval": float(pool_raw.get("test_interval", 120)),
test_url=pool_raw.get("test_url", "http://httpbin.org/ip"), "test_url": pool_raw.get("test_url", ""),
test_timeout=float(pool_raw.get("test_timeout", 15)), "test_timeout": float(pool_raw.get("test_timeout", 15)),
test_concurrency=int(pool_raw.get("test_concurrency", 5)), "test_concurrency": int(pool_raw.get("test_concurrency", 5)),
max_fails=int(pool_raw.get("max_fails", 3)), "max_fails": int(pool_raw.get("max_fails", 3)),
state_file=pool_raw.get("state_file", ""), "state_file": pool_raw.get("state_file", ""),
report_url=pool_raw.get("report_url", ""), "report_url": pool_raw.get("report_url", ""),
) }
if "test_targets" in pool_raw:
kwargs["test_targets"] = list(pool_raw["test_targets"])
config.proxy_pool = ProxyPoolConfig(**kwargs)
elif "proxy_source" in raw: elif "proxy_source" in raw:
# backward compat: convert legacy proxy_source to proxy_pool # backward compat: convert legacy proxy_source to proxy_pool
src_raw = raw["proxy_source"] src_raw = raw["proxy_source"]

View File

@@ -7,10 +7,11 @@ import json
import logging import logging
import os import os
import random import random
import ssl
import time import time
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from urllib.parse import urlencode, urlparse from urllib.parse import urlencode
from .config import ChainHop, PoolSourceConfig, ProxyPoolConfig, parse_api_proxies, parse_proxy_url from .config import ChainHop, PoolSourceConfig, ProxyPoolConfig, parse_api_proxies, parse_proxy_url
from .http import http_get_json, http_post_json from .http import http_get_json, http_post_json
@@ -62,6 +63,8 @@ class ProxyPool:
self._tasks: list[asyncio.Task] = [] self._tasks: list[asyncio.Task] = []
self._stop = asyncio.Event() self._stop = asyncio.Event()
self._state_path = self._resolve_state_path() self._state_path = self._resolve_state_path()
self._ssl_ctx = ssl.create_default_context()
self._target_idx = 0
# -- public interface ---------------------------------------------------- # -- public interface ----------------------------------------------------
@@ -234,48 +237,52 @@ class ProxyPool:
# -- health testing ------------------------------------------------------ # -- health testing ------------------------------------------------------
async def _http_check(self, chain: list[ChainHop]) -> bool: async def _tls_check(self, chain: list[ChainHop]) -> bool:
"""Send an HTTP GET through *chain* and return True on 2xx.""" """Perform a TLS handshake through *chain* and return True on success."""
parsed = urlparse(self._cfg.test_url) targets = self._cfg.test_targets
host = parsed.hostname or "httpbin.org" if not targets:
port = parsed.port or 80 return False
path = parsed.path or "/"
host = targets[self._target_idx % len(targets)]
self._target_idx += 1
try: try:
reader, writer = await build_chain( reader, writer = await build_chain(
chain, host, port, timeout=self._cfg.test_timeout, chain, host, 443, timeout=self._cfg.test_timeout,
) )
except (ProtoError, TimeoutError, ConnectionError, OSError, EOFError): except (ProtoError, TimeoutError, ConnectionError, OSError, EOFError):
return False return False
try: try:
request = f"GET {path} HTTP/1.1\r\nHost: {host}\r\nConnection: close\r\n\r\n" transport = writer.transport
writer.write(request.encode()) protocol = transport.get_protocol()
await writer.drain() loop = asyncio.get_running_loop()
new_transport = await asyncio.wait_for(
line = await asyncio.wait_for(reader.readline(), timeout=self._cfg.test_timeout) loop.start_tls(transport, protocol, self._ssl_ctx, server_hostname=host),
parts = line.decode("utf-8", errors="replace").split(None, 2) timeout=self._cfg.test_timeout,
return len(parts) >= 2 and parts[1].startswith("2") )
except (TimeoutError, ConnectionError, OSError, EOFError): new_transport.close()
return True
except (ssl.SSLError, TimeoutError, ConnectionError, OSError, EOFError):
return False return False
finally: finally:
try: try:
if not writer.is_closing():
writer.close() writer.close()
await writer.wait_closed()
except OSError: except OSError:
pass pass
async def _test_proxy(self, key: str, entry: ProxyEntry) -> bool: async def _test_proxy(self, key: str, entry: ProxyEntry) -> bool:
"""Test a single proxy by building the full chain and sending HTTP GET.""" """Test a single proxy via TLS handshake through the full chain."""
entry.last_test = time.time() entry.last_test = time.time()
entry.tests += 1 entry.tests += 1
return await self._http_check(self._chain + [entry.hop]) return await self._tls_check(self._chain + [entry.hop])
async def _test_chain(self) -> bool: async def _test_chain(self) -> bool:
"""Test the static chain without any pool proxy.""" """Test the static chain without any pool proxy."""
if not self._chain: if not self._chain:
return True return True
return await self._http_check(self._chain) return await self._tls_check(self._chain)
async def _run_health_tests(self, keys: list[str] | None = None) -> None: async def _run_health_tests(self, keys: list[str] | None = None) -> None:
"""Test proxies with bounded concurrency. """Test proxies with bounded concurrency.

View File

@@ -175,3 +175,40 @@ class TestConfig:
cfg_file.write_text("listen: 1080\n") cfg_file.write_text("listen: 1080\n")
c = load_config(cfg_file) c = load_config(cfg_file)
assert c.tor is None assert c.tor is None
def test_proxy_pool_test_targets(self, tmp_path):
cfg_file = tmp_path / "test.yaml"
cfg_file.write_text(
"proxy_pool:\n"
" sources: []\n"
" test_targets:\n"
" - host-a.example.com\n"
" - host-b.example.com\n"
)
c = load_config(cfg_file)
assert c.proxy_pool is not None
assert c.proxy_pool.test_targets == ["host-a.example.com", "host-b.example.com"]
assert c.proxy_pool.test_url == ""
def test_proxy_pool_legacy_test_url(self, tmp_path):
cfg_file = tmp_path / "test.yaml"
cfg_file.write_text(
"proxy_pool:\n"
" sources: []\n"
" test_url: http://httpbin.org/ip\n"
)
c = load_config(cfg_file)
assert c.proxy_pool is not None
assert c.proxy_pool.test_targets == ["httpbin.org"]
def test_proxy_pool_defaults(self, tmp_path):
cfg_file = tmp_path / "test.yaml"
cfg_file.write_text(
"proxy_pool:\n"
" sources: []\n"
)
c = load_config(cfg_file)
assert c.proxy_pool is not None
assert c.proxy_pool.test_targets == [
"www.google.com", "www.cloudflare.com", "www.amazon.com",
]

View File

@@ -1,6 +1,9 @@
"""Tests for the managed proxy pool.""" """Tests for the managed proxy pool."""
import asyncio
import ssl
import time import time
from unittest.mock import AsyncMock, MagicMock, patch
import pytest import pytest
@@ -468,3 +471,118 @@ class TestProxyPoolPersistence:
entry = pool2._proxies["socks5://1.2.3.4:1080"] entry = pool2._proxies["socks5://1.2.3.4:1080"]
assert entry.hop.username == "user" assert entry.hop.username == "user"
assert entry.hop.password == "pass" assert entry.hop.password == "pass"
class TestTlsCheck:
"""Test TLS handshake health check."""
def _make_pool(self, **kwargs):
cfg = ProxyPoolConfig(sources=[], **kwargs)
return ProxyPool(cfg, [], timeout=10.0)
def test_success(self):
pool = self._make_pool(test_targets=["www.example.com"])
mock_writer = MagicMock()
mock_writer.is_closing.return_value = False
mock_transport = MagicMock()
mock_protocol = MagicMock()
mock_transport.get_protocol.return_value = mock_protocol
mock_writer.transport = mock_transport
new_transport = MagicMock()
chain_ret = (MagicMock(), mock_writer)
with (
patch("s5p.pool.build_chain", new_callable=AsyncMock, return_value=chain_ret),
patch("asyncio.get_running_loop") as mock_loop_fn,
):
mock_loop = MagicMock()
mock_loop.start_tls = AsyncMock(return_value=new_transport)
mock_loop_fn.return_value = mock_loop
result = asyncio.run(pool._tls_check([]))
assert result is True
mock_loop.start_tls.assert_called_once_with(
mock_transport, mock_protocol, pool._ssl_ctx,
server_hostname="www.example.com",
)
new_transport.close.assert_called_once()
def test_build_chain_failure(self):
pool = self._make_pool(test_targets=["www.example.com"])
with patch(
"s5p.pool.build_chain", new_callable=AsyncMock,
side_effect=ConnectionError("refused"),
):
result = asyncio.run(pool._tls_check([]))
assert result is False
def test_handshake_failure(self):
pool = self._make_pool(test_targets=["www.example.com"])
mock_writer = MagicMock()
mock_writer.is_closing.return_value = False
mock_transport = MagicMock()
mock_transport.get_protocol.return_value = MagicMock()
mock_writer.transport = mock_transport
chain_ret = (MagicMock(), mock_writer)
with (
patch("s5p.pool.build_chain", new_callable=AsyncMock, return_value=chain_ret),
patch("asyncio.get_running_loop") as mock_loop_fn,
):
mock_loop = MagicMock()
mock_loop.start_tls = AsyncMock(
side_effect=ssl.SSLError("handshake failed"),
)
mock_loop_fn.return_value = mock_loop
result = asyncio.run(pool._tls_check([]))
assert result is False
def test_round_robin_rotation(self):
targets = ["host-a.example.com", "host-b.example.com", "host-c.example.com"]
pool = self._make_pool(test_targets=targets)
selected: list[str] = []
async def fake_build_chain(chain, host, port, timeout=None):
selected.append(host)
raise ConnectionError("test")
with patch("s5p.pool.build_chain", side_effect=fake_build_chain):
for _ in range(6):
asyncio.run(pool._tls_check([]))
assert selected == ["host-a.example.com", "host-b.example.com", "host-c.example.com",
"host-a.example.com", "host-b.example.com", "host-c.example.com"]
def test_empty_targets(self):
pool = self._make_pool(test_targets=[])
result = asyncio.run(pool._tls_check([]))
assert result is False
class TestProxyPoolConfigCompat:
"""Test backward compatibility for test_url -> test_targets."""
def test_legacy_test_url_converts(self):
cfg = ProxyPoolConfig(test_url="http://httpbin.org/ip")
assert cfg.test_targets == ["httpbin.org"]
def test_explicit_test_targets_wins(self):
cfg = ProxyPoolConfig(
test_url="http://httpbin.org/ip",
test_targets=["custom.example.com"],
)
assert cfg.test_targets == ["custom.example.com"]
def test_defaults_when_neither_set(self):
cfg = ProxyPoolConfig()
assert cfg.test_targets == ["www.google.com", "www.cloudflare.com", "www.amazon.com"]
assert cfg.test_url == ""