Files
ppf/config.py
Username 9b44043237
All checks were successful
CI / syntax-check (push) Successful in 3s
CI / memory-leak-check (push) Successful in 11s
add ssl_first: try SSL handshake before secondary check
When ssl_first=1 (default), proxy validation first attempts an SSL
handshake. If it fails, falls back to the configured secondary check
(head, judges, or irc). This separates SSL capability detection from
basic connectivity testing.

New config options:
- ssl_first: enable SSL-first pattern (default: 1)
- checktype: secondary check type (head, judges, irc)
2025-12-28 14:56:46 +01:00

165 lines
11 KiB
Python

from comboparse import ComboParser
from misc import set_log_level, _log
import os
class Config(ComboParser):
def load(self):
super(Config, self).load()
self.torhosts = [ str(i).strip() for i in self.common.tor_hosts.split(',') ]
# threads config = per-host value, multiply by Tor host count
self.watchd.threads = self.watchd.threads * len(self.torhosts)
with open(self.watchd.source_file, 'r') as handle:
self.servers = [x.strip() for x in handle.readlines() if len(x.strip()) > 0]
# Parse checktypes as comma-separated list
self.watchd.checktypes = [t.strip() for t in self.watchd.checktype.split(',') if t.strip()]
# Apply log level from CLI flags
if self.args.quiet:
set_log_level('warn')
elif self.args.verbose:
set_log_level('debug')
def validate(self):
"""Validate configuration values. Returns list of errors."""
errors = []
warnings = []
# Validate port numbers
if not 1 <= self.httpd.port <= 65535:
errors.append('httpd.port must be 1-65535, got %d' % self.httpd.port)
# Validate timeouts (must be positive)
if self.common.timeout_connect <= 0:
errors.append('common.timeout_connect must be > 0')
if self.common.timeout_read <= 0:
errors.append('common.timeout_read must be > 0')
if self.watchd.timeout <= 0:
errors.append('watchd.timeout must be > 0')
if self.ppf.timeout <= 0:
errors.append('ppf.timeout must be > 0')
# Validate thread counts (0 allowed for watchd to disable local testing)
if self.watchd.threads < 0:
errors.append('watchd.threads must be >= 0')
if self.ppf.threads < 1:
errors.append('ppf.threads must be >= 1')
if self.scraper.threads < 1:
errors.append('scraper.threads must be >= 1')
# Validate max_fail
if self.watchd.max_fail < 1:
errors.append('watchd.max_fail must be >= 1')
if self.ppf.max_fail < 1:
errors.append('ppf.max_fail must be >= 1')
# Validate checktypes (secondary check types, ssl is handled by ssl_first)
valid_checktypes = {'irc', 'head', 'judges'}
for ct in self.watchd.checktypes:
if ct not in valid_checktypes:
errors.append('watchd.checktype "%s" invalid, must be one of: %s' % (ct, ', '.join(sorted(valid_checktypes))))
if not self.watchd.checktypes:
errors.append('watchd.checktype must specify at least one valid type')
# Validate engine names
valid_engines = {'duckduckgo', 'startpage', 'brave', 'ecosia',
'mojeek', 'qwant', 'yandex', 'github', 'gitlab',
'codeberg', 'gitea', 'searx'}
configured = [e.strip().lower() for e in self.scraper.engines.split(',')]
for eng in configured:
if eng and eng not in valid_engines:
warnings.append('unknown engine: %s' % eng)
# Validate source_file exists
if not os.path.exists(self.watchd.source_file):
warnings.append('source_file not found: %s' % self.watchd.source_file)
# Validate database directories are writable
for db in (self.watchd.database, self.ppf.database):
db_dir = os.path.dirname(db) or '.'
if not os.access(db_dir, os.W_OK):
errors.append('database directory not writable: %s' % db_dir)
# Log warnings
for w in warnings:
_log(w, 'warn')
return errors
def __init__(self):
super(Config, self).__init__('config.ini')
section = 'common'
self.add_item(section, 'tor_hosts', str, '127.0.0.1:9050', 'comma-separated list of tor proxy address(es)', True)
self.add_item(section, 'timeout_connect', int, 10, 'connection timeout in seconds (default: 10)', False)
self.add_item(section, 'timeout_read', int, 15, 'read timeout in seconds (default: 15)', False)
self.add_item(section, 'profiling', bool, False, 'enable cProfile profiling (default: False)', False)
section = 'watchd'
self.add_item(section, 'outage_threshold', float, 4.0, 'mininum success percentage required to not drop check results', False)
self.add_item(section, 'max_fail', int, 5, 'number of fails after which a proxy is considered dead', False)
self.add_item(section, 'threads', int, 10, 'number of threads watchd uses to check proxies', True)
self.add_item(section, 'min_threads', int, 0, 'minimum threads (0 = auto: threads/4)', False)
self.add_item(section, 'timeout', int, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
self.add_item(section, 'timeout_fail_inc', float, 1.5, 'extra timeout per failure (default: 1.5)', False)
self.add_item(section, 'timeout_fail_max', float, 15, 'max extra timeout for failures (default: 15)', False)
self.add_item(section, 'submit_after', int, 200, 'min. number of tested proxies for DB write', False)
self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
self.add_item(section, 'checktime', int, 1800, 'base checking interval for proxies in db in seconds', False)
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for proxies in db in seconds per experienced failure', False)
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
self.add_item(section, 'oldies', bool, False, 're-test old proxies as well ? (default: False)', False)
self.add_item(section, 'oldies_checktime', int, 43200, 'base checking interval for *old* proxies in seconds (default: 43200)', False)
self.add_item(section, 'oldies_multi', int, 10, 'fetch threads*multi rows when testing oldies (default: 10)', False)
self.add_item(section, 'source_file', str, 'servers.txt', 'server/url list to read from (default: servers.txt)', False)
self.add_item(section, 'stale_days', int, 30, 'days after which dead proxies are removed (default: 30)', False)
self.add_item(section, 'stats_interval', int, 300, 'seconds between status reports (default: 300)', False)
self.add_item(section, 'tor_safeguard', bool, True, 'enable tor safeguard (default: True)', False)
self.add_item(section, 'checktype', str, 'head', 'secondary check type: irc, head, judges (used when ssl_first fails)', False)
self.add_item(section, 'ssl_first', bool, True, 'try SSL handshake first, fallback to checktype on failure (default: True)', False)
self.add_item(section, 'scale_cooldown', int, 10, 'seconds between thread scaling decisions (default: 10)', False)
self.add_item(section, 'scale_threshold', float, 10.0, 'min success rate % to scale up threads (default: 10.0)', False)
section = 'httpd'
self.add_item(section, 'listenip', str, '127.0.0.1', 'address for the httpd to listen to (default: 127.0.0.1)', True)
self.add_item(section, 'port', int, 8081, 'port for the httpd to listen to (default: 8081)', True)
self.add_item(section, 'enabled', bool, False, 'start httpd (default: False)', True)
section = 'ppf'
self.add_item(section, 'debug', bool, False, 'whether to print additional debug info', False)
self.add_item(section, 'search', bool, True, 'whether to use searx search engine to find new proxy lists', False)
self.add_item(section, 'timeout', float, 15, 'timeout for blocking operations (connect/recv/...) for proxy checks in seconds', False)
self.add_item(section, 'http_retries', int, 1, 'number of retries for http connects', False)
self.add_item(section, 'threads', int, 1, 'number of threads to run (default: 1)', False)
self.add_item(section, 'checktime', int, 3600, 'base checking interval for urls in db in seconds', False)
self.add_item(section, 'perfail_checktime', int, 3600, 'additional checking interval for urls in db in seconds per resultless check', False)
self.add_item(section, 'max_fail', int, 5, 'number of fails after which an url is considered dead', False)
self.add_item(section, 'database', str, 'proxies.sqlite', 'filename of database', True)
self.add_item(section, 'extract_samedomain', bool, False, 'extract only url from same domains? (default: False)', False)
self.add_item(section, 'list_max_age_days', int, 7, 'max age in days for proxy list URLs (default: 7)', False)
section = 'scraper'
self.add_item(section, 'enabled', bool, True, 'enable search engine scraper (default: True)', False)
self.add_item(section, 'threads', int, 3, 'number of scraper threads (default: 3)', False)
self.add_item(section, 'debug', bool, False, 'scraper: whether to print additional debug info', False)
self.add_item(section, 'query', str, 'psw', 'build query using Proxies, Search, Websites', False)
self.add_item(section, 'backoff_base', int, 30, 'base backoff delay in seconds (default: 30)', False)
self.add_item(section, 'backoff_max', int, 3600, 'max backoff delay in seconds (default: 3600)', False)
self.add_item(section, 'fail_threshold', int, 2, 'consecutive failures before backoff (default: 2)', False)
self.add_item(section, 'engines', str, 'searx,duckduckgo,github', 'comma-separated search engines (default: searx,duckduckgo,github)', False)
self.add_item(section, 'max_pages', int, 5, 'max pages to fetch per engine query (default: 5)', False)
self.add_item(section, 'libretranslate_url', str, 'https://lt.mymx.me/translate', 'LibreTranslate API URL (default: https://lt.mymx.me/translate)', False)
self.add_item(section, 'libretranslate_enabled', bool, False, 'enable LibreTranslate for dynamic translations (default: False)', False)
section = 'worker'
self.add_item(section, 'batch_size', int, 100, 'proxies per work batch (default: 100)', False)
self.add_item(section, 'heartbeat', int, 60, 'heartbeat interval in seconds (default: 60)', False)
self.add_item(section, 'claim_timeout', int, 300, 'seconds before unclaimed work is released (default: 300)', False)
self.aparser.add_argument("--file", help="import a single file containing proxy addrs", type=str, default='', required=False)
self.aparser.add_argument("--nobs", help="disable BeautifulSoup, use stdlib HTMLParser", action='store_true', default=False)
self.aparser.add_argument("-q", "--quiet", help="suppress info messages, show warnings and errors only", action='store_true', default=False)
self.aparser.add_argument("-v", "--verbose", help="show debug messages", action='store_true', default=False)
self.aparser.add_argument("--profile", help="enable cProfile profiling, output to profile.stats", action='store_true', default=False)
self.aparser.add_argument("--worker", help="run as worker node", action='store_true', default=False)
self.aparser.add_argument("--server", help="master server URL (e.g., https://master:8081)", type=str, default='')
self.aparser.add_argument("--worker-key", help="worker authentication key", type=str, default='')
self.aparser.add_argument("--register", help="register as worker with master server", action='store_true', default=False)
self.aparser.add_argument("--worker-name", help="worker name for registration (default: hostname)", type=str, default='')