diff --git a/src/s5p/config.py b/src/s5p/config.py index a15b532..3469162 100644 --- a/src/s5p/config.py +++ b/src/s5p/config.py @@ -9,6 +9,7 @@ from urllib.parse import urlparse import yaml DEFAULT_PORTS = {"socks5": 1080, "socks4": 1080, "http": 8080} +VALID_PROTOS = {"socks5", "socks4", "http"} @dataclass @@ -103,6 +104,27 @@ def parse_proxy_url(url: str) -> ChainHop: ) +def parse_api_proxies(data: dict) -> list[ChainHop]: + """Parse proxy list from API response ``{"proxies": [...]}``. + + Each entry must have ``proto`` (socks5/socks4/http) and ``proxy`` + (host:port). Invalid entries are silently skipped. + """ + proxies: list[ChainHop] = [] + for entry in data.get("proxies", []): + proto = entry.get("proto") + addr = entry.get("proxy", "") + if not proto or proto not in VALID_PROTOS or ":" not in addr: + continue + host, port_str = addr.rsplit(":", 1) + try: + port = int(port_str) + except ValueError: + continue + proxies.append(ChainHop(proto=proto, host=host, port=port)) + return proxies + + def load_config(path: str | Path) -> Config: """Load configuration from a YAML file.""" path = Path(path) diff --git a/src/s5p/pool.py b/src/s5p/pool.py index ca5a0ef..7db3df6 100644 --- a/src/s5p/pool.py +++ b/src/s5p/pool.py @@ -12,13 +12,12 @@ from dataclasses import dataclass from pathlib import Path from urllib.parse import urlencode, urlparse -from .config import ChainHop, PoolSourceConfig, ProxyPoolConfig, parse_proxy_url +from .config import ChainHop, PoolSourceConfig, ProxyPoolConfig, parse_api_proxies, parse_proxy_url from .http import http_get_json, http_post_json from .proto import ProtoError, build_chain logger = logging.getLogger("s5p") -VALID_PROTOS = {"socks5", "socks4", "http"} STATE_VERSION = 1 @@ -195,20 +194,7 @@ class ProxyPool: url = f"{url}{sep}{urlencode(params)}" data = await http_get_json(url) - - proxies: list[ChainHop] = [] - for entry in data.get("proxies", []): - proto = entry.get("proto") - addr = entry.get("proxy", "") - if not proto or proto not in VALID_PROTOS or ":" not in addr: - continue - host, port_str = addr.rsplit(":", 1) - try: - port = int(port_str) - except ValueError: - continue - proxies.append(ChainHop(proto=proto, host=host, port=port)) - return proxies + return parse_api_proxies(data) def _fetch_file_sync(self, src: PoolSourceConfig) -> list[ChainHop]: """Parse a text file with one proxy URL per line (runs in executor).""" diff --git a/src/s5p/source.py b/src/s5p/source.py index 8e7cbbd..12ca126 100644 --- a/src/s5p/source.py +++ b/src/s5p/source.py @@ -8,13 +8,11 @@ import random import time from urllib.parse import urlencode -from .config import ChainHop, ProxySourceConfig +from .config import ChainHop, ProxySourceConfig, parse_api_proxies from .http import http_get_json logger = logging.getLogger("s5p") -VALID_PROTOS = {"socks5", "socks4", "http"} - class ProxySource: """Fetches and caches proxies from an HTTP API. @@ -76,18 +74,4 @@ class ProxySource: url = f"{url}{sep}{urlencode(params)}" data = await http_get_json(url) - - proxies: list[ChainHop] = [] - for entry in data.get("proxies", []): - proto = entry.get("proto") - addr = entry.get("proxy", "") - if not proto or proto not in VALID_PROTOS or ":" not in addr: - continue - host, port_str = addr.rsplit(":", 1) - try: - port = int(port_str) - except ValueError: - continue - proxies.append(ChainHop(proto=proto, host=host, port=port)) - - return proxies + return parse_api_proxies(data)