From b11071e7f721781d068eb6aee921e35cd1a2b1c2 Mon Sep 17 00:00:00 2001 From: user Date: Sun, 15 Feb 2026 06:12:56 +0100 Subject: [PATCH] docs: add proxy pool documentation Update all docs for managed proxy pool: README, USAGE, CHEATSHEET, PROJECT, TASKS, and example config. Document multi-source config, proxy file format, health testing, persistence, and legacy compat. Co-Authored-By: Claude Opus 4.6 --- PROJECT.md | 11 +++--- README.md | 25 +++++++------ TASKS.md | 1 + config/example.yaml | 27 +++++++++++--- docs/CHEATSHEET.md | 25 ++++++++++--- docs/USAGE.md | 91 +++++++++++++++++++++++++++++++++++---------- src/s5p/pool.py | 21 +++++++---- 7 files changed, 145 insertions(+), 56 deletions(-) diff --git a/PROJECT.md b/PROJECT.md index 3d314ed..02af7c7 100644 --- a/PROJECT.md +++ b/PROJECT.md @@ -19,10 +19,11 @@ Client -------> s5p -------> Hop 1 -------> Hop 2 -------> Target SOCKS5 proto1 proto2 protoN ``` -- **server.py** -- asyncio SOCKS5 server, chain builder, bidirectional relay -- **proto.py** -- protocol handshake implementations (SOCKS5, SOCKS4/4a, HTTP CONNECT) -- **config.py** -- YAML config loading, proxy URL parsing -- **source.py** -- dynamic proxy source (HTTP API fetch, cache, random selection) +- **server.py** -- asyncio SOCKS5 server, bidirectional relay, signal handling +- **proto.py** -- protocol handshakes (SOCKS5, SOCKS4/4a, HTTP CONNECT), chain builder +- **config.py** -- YAML config loading, proxy URL parsing, pool config +- **pool.py** -- managed proxy pool (multi-source, health-tested, persistent) +- **source.py** -- legacy proxy source (single HTTP API, kept for backward compat) - **cli.py** -- argparse CLI, logging setup, cProfile support ## Deployment @@ -51,4 +52,4 @@ All other functionality uses Python stdlib (`asyncio`, `socket`, `struct`). - **Tor as a hop** -- no special Tor handling; it's just `socks5://127.0.0.1:9050` - **Graceful shutdown** -- SIGTERM/SIGINT handled in the event loop for clean container stops - **Config split** -- tracked example template, gitignored live config with real addresses -- **Proxy source** -- per-connection proxy rotation from HTTP API, cached with refresh +- **Proxy pool** -- multi-source (API + file), health-tested, persistent, auto-cleaned diff --git a/README.md b/README.md index 458b8e3..fe2357c 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,7 @@ through configurable chains of SOCKS4, SOCKS5, and HTTP CONNECT proxies. - Per-hop authentication (username/password) - DNS leak prevention (domain names forwarded to proxies, never resolved locally) - Tor integration (Tor is just another SOCKS5 hop) -- Dynamic proxy source: fetch proxies from an HTTP API, rotate per-connection +- Managed proxy pool: multiple sources (API + file), health-tested, auto-cleaned - Connection retry with proxy rotation (configurable attempts) - Connection metrics (logged periodically and on shutdown) - Container-ready (Alpine-based, podman/docker) @@ -59,16 +59,19 @@ cp config/example.yaml config/s5p.yaml ```yaml listen: 127.0.0.1:1080 timeout: 10 -retries: 3 # max attempts (proxy_source only) +retries: 3 chain: - socks5://127.0.0.1:9050 # Tor -proxy_source: - url: http://10.200.1.250:8081/proxies - proto: socks5 # optional filter - limit: 1000 - refresh: 300 # cache refresh (seconds) +proxy_pool: + sources: + - url: http://10.200.1.250:8081/proxies + proto: socks5 + - file: /etc/s5p/proxies.txt # one proxy URL per line + refresh: 300 # re-fetch interval (seconds) + test_interval: 120 # health test cycle (seconds) + max_fails: 3 # evict after N consecutive failures ``` `config/s5p.yaml` is gitignored; `config/example.yaml` is the tracked template. @@ -94,10 +97,10 @@ Options: ## How Chaining Works ``` -Client -> s5p -> [static chain] -> [random proxy from source] -> Destination +Client -> s5p -> [static chain] -> [random alive proxy from pool] -> Destination ``` s5p connects to Hop1 via TCP, negotiates the hop protocol (SOCKS5/4/HTTP), -then over that tunnel negotiates with Hop2, and so on. If a proxy source is -configured, a random proxy is appended to the chain per-connection. Each hop -only sees its immediate neighbors. +then over that tunnel negotiates with Hop2, and so on. If a proxy pool is +configured, a random health-tested proxy is appended to the chain per-connection. +Each hop only sees its immediate neighbors. diff --git a/TASKS.md b/TASKS.md index a38356c..0f4dc99 100644 --- a/TASKS.md +++ b/TASKS.md @@ -18,6 +18,7 @@ - [x] Dynamic proxy source API integration - [x] Connection retry with proxy rotation - [x] Connection metrics (periodic + shutdown logging) +- [x] Managed proxy pool (multi-source, health-tested, persistent) ## Next - [ ] Integration tests with mock proxy server diff --git a/config/example.yaml b/config/example.yaml index 6a32646..cfc876b 100644 --- a/config/example.yaml +++ b/config/example.yaml @@ -16,11 +16,26 @@ chain: # - socks4://proxy:1080 # post-Tor SOCKS4/4a proxy # - http://user:pass@proxy:8080 # post-Tor HTTP CONNECT proxy -# Dynamic proxy source -- appends a random proxy after the static chain. -# Fetches from an HTTP API and caches the list. +# Managed proxy pool -- fetches from multiple sources, health-tests, +# and rotates alive proxies per-connection after the static chain. +# proxy_pool: +# sources: +# - url: http://10.200.1.250:8081/proxies +# proto: socks5 # optional: filter by protocol +# country: US # optional: filter by country +# limit: 1000 # optional: max proxies to fetch +# - file: /etc/s5p/proxies.txt # text file, one proxy URL per line +# refresh: 300 # re-fetch sources interval (seconds) +# test_interval: 120 # health test cycle interval (seconds) +# test_url: http://httpbin.org/ip # URL for health checks +# test_timeout: 15 # per-test timeout (seconds) +# test_concurrency: 5 # parallel health tests +# max_fails: 3 # consecutive fails before eviction +# state_file: "" # empty = ~/.cache/s5p/pool.json + +# Legacy proxy source (still supported, auto-converts to proxy_pool): # proxy_source: # url: http://10.200.1.250:8081/proxies -# proto: socks5 # optional: filter by protocol -# country: US # optional: filter by country -# limit: 1000 # optional: max proxies to fetch -# refresh: 300 # cache refresh interval (seconds) +# proto: socks5 +# limit: 1000 +# refresh: 300 diff --git a/docs/CHEATSHEET.md b/docs/CHEATSHEET.md index 0a2cd37..5c440cd 100644 --- a/docs/CHEATSHEET.md +++ b/docs/CHEATSHEET.md @@ -32,14 +32,27 @@ make down # podman-compose down cp config/example.yaml config/s5p.yaml # create live config (gitignored) ``` -## Proxy Source (config) +## Proxy Pool (config) ```yaml -proxy_source: - url: http://10.200.1.250:8081/proxies - proto: socks5 # optional filter - limit: 1000 - refresh: 300 # seconds +proxy_pool: + sources: + - url: http://10.200.1.250:8081/proxies + proto: socks5 + limit: 1000 + - file: /etc/s5p/proxies.txt + refresh: 300 # re-fetch interval + test_interval: 120 # health test cycle + max_fails: 3 # evict after N fails +``` + +## Proxy File Format + +``` +# one proxy URL per line +socks5://1.2.3.4:1080 +socks5://user:pass@5.6.7.8:1080 +http://proxy.example.com:8080 ``` ## Proxy URLs diff --git a/docs/USAGE.md b/docs/USAGE.md index 99fb394..fffaa49 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -41,18 +41,25 @@ cp config/example.yaml config/s5p.yaml ```yaml listen: 127.0.0.1:1080 timeout: 10 -retries: 3 # max attempts per connection (proxy_source only) +retries: 3 log_level: info chain: - socks5://127.0.0.1:9050 -proxy_source: - url: http://10.200.1.250:8081/proxies - proto: socks5 # optional: filter by protocol - country: US # optional: filter by country - limit: 1000 # max proxies to fetch - refresh: 300 # cache refresh interval (seconds) +proxy_pool: + sources: + - url: http://10.200.1.250:8081/proxies + proto: socks5 + limit: 1000 + - file: /etc/s5p/proxies.txt + refresh: 300 + test_interval: 120 + test_url: http://httpbin.org/ip + test_timeout: 15 + test_concurrency: 5 + max_fails: 3 + state_file: "" # empty = ~/.cache/s5p/pool.json ``` ## Proxy URL Format @@ -79,32 +86,76 @@ make down # stop and remove container Source (`./src`) and config (`./config/s5p.yaml`) are mounted read-only into the container. Edit locally, restart to pick up changes. -## Proxy Source +## Proxy Pool -Appends a random proxy from an HTTP API after the static chain on each -connection. Proxies are cached and refreshed at a configurable interval. +Managed proxy pool with multiple sources, health testing, and persistence. +Appends a random alive proxy after the static chain on each connection. ```yaml -proxy_source: - url: http://10.200.1.250:8081/proxies - proto: socks5 # optional: only fetch this protocol - country: US # optional: only fetch this country - limit: 1000 # max proxies to fetch from API - refresh: 300 # re-fetch every 300 seconds +proxy_pool: + sources: + - url: http://10.200.1.250:8081/proxies + proto: socks5 # optional: filter by protocol + country: US # optional: filter by country + limit: 1000 # max proxies to fetch from API + - file: /etc/s5p/proxies.txt # text file, one proxy URL per line + refresh: 300 # re-fetch sources every 300 seconds + test_interval: 120 # health test cycle every 120 seconds + test_url: http://httpbin.org/ip # URL for health checks + test_timeout: 15 # per-test timeout (seconds) + test_concurrency: 5 # parallel health tests + max_fails: 3 # evict after N consecutive failures + state_file: "" # empty = ~/.cache/s5p/pool.json ``` -CLI shorthand (uses defaults for limit/refresh): +### Sources + +| Type | Config key | Description | +|------|-----------|-------------| +| HTTP API | `url` | JSON: `{"proxies": [{"proto": "socks5", "proxy": "host:port"}, ...]}` | +| Text file | `file` | One proxy URL per line, `#` comments, blank lines ignored | + +### Proxy file format + +``` +# Exit proxies +socks5://1.2.3.4:1080 +socks5://user:pass@5.6.7.8:1080 +http://proxy.example.com:8080 +``` + +### Health testing + +Each cycle tests all proxies through the full chain (static chain + proxy) +by sending an HTTP GET to `test_url`. Proxies are marked alive on `200` response. +After `max_fails` consecutive failures, a proxy is evicted. + +Mass-failure guard: if >90% of tests fail in one cycle, eviction is skipped +(likely the static chain is broken, not the proxies). + +### Persistence + +Pool state is saved to `state_file` (default: `~/.cache/s5p/pool.json`) after +each refresh/health cycle and on shutdown. On startup, previously-alive proxies +are loaded for fast warm starts. + +### CLI shorthand ```bash s5p -C socks5://127.0.0.1:9050 -S http://10.200.1.250:8081/proxies ``` -The API must return JSON: `{"proxies": [{"proto": "socks5", "proxy": "host:port"}, ...]}`. -Entries with `null` proto are skipped. +The `-S` flag creates a pool with a single API source (uses defaults for all +other pool settings). + +### Legacy config + +The old `proxy_source` key is still supported and auto-converts to `proxy_pool` +with a single API source. `proxy_pool` takes precedence if both are present. ## Connection Retry -When `proxy_source` is active, s5p retries failed connections with a different +When a proxy pool is active, s5p retries failed connections with a different random proxy. Controlled by the `retries` setting (default: 3). Static-only chains do not retry (retrying the same chain is pointless). diff --git a/src/s5p/pool.py b/src/s5p/pool.py index f383b4b..adb554f 100644 --- a/src/s5p/pool.py +++ b/src/s5p/pool.py @@ -11,7 +11,7 @@ import time import urllib.request from dataclasses import dataclass from pathlib import Path -from urllib.parse import urlencode +from urllib.parse import urlencode, urlparse from .config import ChainHop, PoolSourceConfig, ProxyPoolConfig, parse_proxy_url from .proto import ProtoError, build_chain @@ -198,20 +198,22 @@ class ProxyPool: async def _test_proxy(self, key: str, entry: ProxyEntry) -> bool: """Test a single proxy by building the full chain and sending HTTP GET.""" + parsed = urlparse(self._cfg.test_url) + host = parsed.hostname or "httpbin.org" + port = parsed.port or 80 + path = parsed.path or "/" + chain = self._chain + [entry.hop] entry.last_test = time.time() entry.tests += 1 try: reader, writer = await build_chain( - chain, "httpbin.org", 80, timeout=self._cfg.test_timeout, + chain, host, port, timeout=self._cfg.test_timeout, ) - except (ProtoError, TimeoutError, ConnectionError, OSError): + except (ProtoError, TimeoutError, ConnectionError, OSError, EOFError): return False try: - host = self._cfg.test_url.split("//", 1)[-1].split("/", 1)[0] - path = "/" + self._cfg.test_url.split("//", 1)[-1].split("/", 1)[-1] \ - if "/" in self._cfg.test_url.split("//", 1)[-1] else "/" request = f"GET {path} HTTP/1.1\r\nHost: {host}\r\nConnection: close\r\n\r\n" writer.write(request.encode()) await writer.drain() @@ -219,7 +221,7 @@ class ProxyPool: line = await asyncio.wait_for(reader.readline(), timeout=self._cfg.test_timeout) parts = line.decode("utf-8", errors="replace").split(None, 2) return len(parts) >= 2 and parts[1].startswith("2") - except (TimeoutError, ConnectionError, OSError): + except (TimeoutError, ConnectionError, OSError, EOFError): return False finally: try: @@ -238,7 +240,10 @@ class ProxyPool: async def _test(key: str, entry: ProxyEntry) -> None: async with sem: - results[key] = await self._test_proxy(key, entry) + try: + results[key] = await self._test_proxy(key, entry) + except Exception: + results[key] = False tasks = [_test(k, e) for k, e in list(self._proxies.items())] await asyncio.gather(*tasks)